Bug Summary

File:ufs/ffs/ffs_softdep.c
Warning:line 639, column 2
Value stored to 'loopcount' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ffs_softdep.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/ufs/ffs/ffs_softdep.c
1/* $OpenBSD: ffs_softdep.c,v 1.150 2021/04/28 09:53:53 claudio Exp $ */
2
3/*
4 * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
5 *
6 * The soft updates code is derived from the appendix of a University
7 * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
8 * "Soft Updates: A Solution to the Metadata Update Problem in File
9 * Systems", CSE-TR-254-95, August 1995).
10 *
11 * Further information about soft updates can be obtained from:
12 *
13 * Marshall Kirk McKusick http://www.mckusick.com/softdep/
14 * 1614 Oxford Street mckusick@mckusick.com
15 * Berkeley, CA 94709-1608 +1-510-843-9542
16 * USA
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 *
22 * 1. Redistributions of source code must retain the above copyright
23 * notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 * notice, this list of conditions and the following disclaimer in the
26 * documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
29 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
30 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
32 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
41 * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.86 2001/02/04 16:08:18 phk Exp $
42 */
43
44#include <sys/param.h>
45#include <sys/buf.h>
46#include <sys/kernel.h>
47#include <sys/malloc.h>
48#include <sys/mount.h>
49#include <sys/proc.h>
50#include <sys/pool.h>
51#include <sys/syslog.h>
52#include <sys/systm.h>
53#include <sys/vnode.h>
54#include <sys/specdev.h>
55#include <crypto/siphash.h>
56#include <ufs/ufs/dir.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufsmount.h>
60#include <ufs/ffs/fs.h>
61#include <ufs/ffs/softdep.h>
62#include <ufs/ffs/ffs_extern.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#define STATIC
66
67/*
68 * Mapping of dependency structure types to malloc types.
69 */
70#define D_PAGEDEP0 0
71#define D_INODEDEP1 1
72#define D_NEWBLK2 2
73#define D_BMSAFEMAP3 3
74#define D_ALLOCDIRECT4 4
75#define D_INDIRDEP5 5
76#define D_ALLOCINDIR6 6
77#define D_FREEFRAG7 7
78#define D_FREEBLKS8 8
79#define D_FREEFILE9 9
80#define D_DIRADD10 10
81#define D_MKDIR11 11
82#define D_DIRREM12 12
83#define D_NEWDIRBLK13 13
84#define D_LAST13 13
85/*
86 * Names of softdep types.
87 */
88const char *softdep_typenames[] = {
89 "pagedep",
90 "inodedep",
91 "newblk",
92 "bmsafemap",
93 "allocdirect",
94 "indirdep",
95 "allocindir",
96 "freefrag",
97 "freeblks",
98 "freefile",
99 "diradd",
100 "mkdir",
101 "dirrem",
102 "newdirblk",
103};
104#define TYPENAME(type)((unsigned)(type) <= 13 ? softdep_typenames[type] : "???") \
105 ((unsigned)(type) <= D_LAST13 ? softdep_typenames[type] : "???")
106/*
107 * Finding the current process.
108 */
109#define CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
110/*
111 * End system adaptation definitions.
112 */
113
114/*
115 * Internal function prototypes.
116 */
117STATIC void softdep_error(char *, int);
118STATIC void drain_output(struct vnode *, int);
119STATIC int getdirtybuf(struct buf *, int);
120STATIC void clear_remove(struct proc *);
121STATIC void clear_inodedeps(struct proc *);
122STATIC int flush_pagedep_deps(struct vnode *, struct mount *,
123 struct diraddhd *);
124STATIC int flush_inodedep_deps(struct fs *, ufsino_t);
125STATIC int handle_written_filepage(struct pagedep *, struct buf *);
126STATIC void diradd_inode_written(struct diradd *, struct inodedep *);
127STATIC int handle_written_inodeblock(struct inodedep *, struct buf *);
128STATIC void handle_allocdirect_partdone(struct allocdirect *);
129STATIC void handle_allocindir_partdone(struct allocindir *);
130STATIC void initiate_write_filepage(struct pagedep *, struct buf *);
131STATIC void handle_written_mkdir(struct mkdir *, int);
132STATIC void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
133#ifdef FFS21
134STATIC void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
135#endif
136STATIC void handle_workitem_freefile(struct freefile *);
137STATIC void handle_workitem_remove(struct dirrem *);
138STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
139 struct inode *, int, struct dirrem **);
140STATIC void free_diradd(struct diradd *);
141STATIC void free_allocindir(struct allocindir *, struct inodedep *);
142STATIC void free_newdirblk(struct newdirblk *);
143STATIC int indir_trunc(struct inode *, daddr_t, int, daddr_t, long *);
144STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
145STATIC void free_allocdirect(struct allocdirectlst *,
146 struct allocdirect *, int);
147STATIC int check_inode_unwritten(struct inodedep *);
148STATIC int free_inodedep(struct inodedep *);
149STATIC void handle_workitem_freeblocks(struct freeblks *);
150STATIC void merge_inode_lists(struct inodedep *);
151STATIC void setup_allocindir_phase2(struct buf *, struct inode *,
152 struct allocindir *);
153STATIC struct allocindir *newallocindir(struct inode *, int, daddr_t,
154 daddr_t);
155STATIC void handle_workitem_freefrag(struct freefrag *);
156STATIC struct freefrag *newfreefrag(struct inode *, daddr_t, long);
157STATIC void allocdirect_merge(struct allocdirectlst *,
158 struct allocdirect *, struct allocdirect *);
159STATIC struct bmsafemap *bmsafemap_lookup(struct buf *);
160STATIC int newblk_lookup(struct fs *, daddr_t, int,
161 struct newblk **);
162STATIC int inodedep_lookup(struct fs *, ufsino_t, int, struct inodedep **);
163STATIC int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
164STATIC void pause_timer(void *);
165STATIC int request_cleanup(int, int);
166STATIC int process_worklist_item(struct mount *, int *, int);
167STATIC void add_to_worklist(struct worklist *);
168
169/*
170 * Exported softdep operations.
171 */
172void softdep_disk_io_initiation(struct buf *);
173void softdep_disk_write_complete(struct buf *);
174void softdep_deallocate_dependencies(struct buf *);
175void softdep_move_dependencies(struct buf *, struct buf *);
176int softdep_count_dependencies(struct buf *bp, int, int);
177
178/*
179 * Locking primitives.
180 *
181 * For a uniprocessor, all we need to do is protect against disk
182 * interrupts. For a multiprocessor, this lock would have to be
183 * a mutex. A single mutex is used throughout this file, though
184 * finer grain locking could be used if contention warranted it.
185 *
186 * For a multiprocessor, the sleep call would accept a lock and
187 * release it after the sleep processing was complete. In a uniprocessor
188 * implementation there is no such interlock, so we simple mark
189 * the places where it needs to be done with the `interlocked' form
190 * of the lock calls. Since the uniprocessor sleep already interlocks
191 * the spl, there is nothing that really needs to be done.
192 */
193#ifndef /* NOT */ DEBUG
194STATIC struct lockit {
195 int lkt_spl;
196} lk = { 0 };
197#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x6) (lk)->lkt_spl = splbio()splraise(0x6)
198#define FREE_LOCK(lk)spllower((lk)->lkt_spl) splx((lk)->lkt_spl)spllower((lk)->lkt_spl)
199#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) (lk)->lkt_spl = (s)
200#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) ((lk)->lkt_spl)
201
202#else /* DEBUG */
203STATIC struct lockit {
204 int lkt_spl;
205 pid_t lkt_held;
206 int lkt_line;
207} lk = { 0, -1 };
208STATIC int lockcnt;
209
210STATIC void acquire_lock(struct lockit *, int);
211STATIC void free_lock(struct lockit *, int);
212STATIC void acquire_lock_interlocked(struct lockit *, int, int);
213STATIC int free_lock_interlocked(struct lockit *, int);
214
215#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x6) acquire_lock(lk, __LINE__215)
216#define FREE_LOCK(lk)spllower((lk)->lkt_spl) free_lock(lk, __LINE__216)
217#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) acquire_lock_interlocked(lk, (s), __LINE__217)
218#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) free_lock_interlocked(lk, __LINE__218)
219
220STATIC void
221acquire_lock(struct lockit *lk, int line)
222{
223 pid_t holder;
224 int original_line;
225
226 if (lk->lkt_held != -1) {
227 holder = lk->lkt_held;
228 original_line = lk->lkt_line;
229 FREE_LOCK(lk)spllower((lk)->lkt_spl);
230 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
231 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
232 else
233 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
234 }
235 lk->lkt_spl = splbio()splraise(0x6);
236 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
237 lk->lkt_line = line;
238 lockcnt++;
239}
240
241STATIC void
242free_lock(struct lockit *lk, int line)
243{
244
245 if (lk->lkt_held == -1)
246 panic("softdep_unlock: lock not held at line %d", line);
247 lk->lkt_held = -1;
248 splx(lk->lkt_spl)spllower(lk->lkt_spl);
249}
250
251STATIC void
252acquire_lock_interlocked(struct lockit *lk, int s, int line)
253{
254 pid_t holder;
255 int original_line;
256
257 if (lk->lkt_held != -1) {
258 holder = lk->lkt_held;
259 original_line = lk->lkt_line;
260 FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl);
261 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
262 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
263 else
264 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
265 }
266 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
267 lk->lkt_line = line;
268 lk->lkt_spl = s;
269 lockcnt++;
270}
271
272STATIC int
273free_lock_interlocked(struct lockit *lk, int line)
274{
275
276 if (lk->lkt_held == -1)
277 panic("softdep_unlock_interlocked: lock not held at line %d", line);
278 lk->lkt_held = -1;
279
280 return (lk->lkt_spl);
281}
282#endif /* DEBUG */
283
284/*
285 * Place holder for real semaphores.
286 */
287struct sema {
288 int value;
289 pid_t holder;
290 char *name;
291 int prio;
292};
293STATIC void sema_init(struct sema *, char *, int);
294STATIC int sema_get(struct sema *, struct lockit *);
295STATIC void sema_release(struct sema *);
296
297STATIC void
298sema_init(struct sema *semap, char *name, int prio)
299{
300
301 semap->holder = -1;
302 semap->value = 0;
303 semap->name = name;
304 semap->prio = prio;
305}
306
307STATIC int
308sema_get(struct sema *semap, struct lockit *interlock)
309{
310 int s;
311
312 if (semap->value++ > 0) {
313 if (interlock != NULL((void *)0))
314 s = FREE_LOCK_INTERLOCKED(interlock)((interlock)->lkt_spl);
315 tsleep_nsec(semap, semap->prio, semap->name, INFSLP0xffffffffffffffffULL);
316 if (interlock != NULL((void *)0)) {
317 ACQUIRE_LOCK_INTERLOCKED(interlock, s)(interlock)->lkt_spl = (s);
318 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
319 }
320 return (0);
321 }
322 semap->holder = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
323 if (interlock != NULL((void *)0))
324 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
325 return (1);
326}
327
328STATIC void
329sema_release(struct sema *semap)
330{
331
332 if (semap->value <= 0 || semap->holder != CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid) {
333#ifdef DEBUG
334 if (lk.lkt_held != -1)
335 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
336#endif
337 panic("sema_release: not held");
338 }
339 if (--semap->value > 0) {
340 semap->value = 0;
341 wakeup(semap);
342 }
343 semap->holder = -1;
344}
345
346/*
347 * Memory management.
348 */
349STATIC struct pool pagedep_pool;
350STATIC struct pool inodedep_pool;
351STATIC struct pool newblk_pool;
352STATIC struct pool bmsafemap_pool;
353STATIC struct pool allocdirect_pool;
354STATIC struct pool indirdep_pool;
355STATIC struct pool allocindir_pool;
356STATIC struct pool freefrag_pool;
357STATIC struct pool freeblks_pool;
358STATIC struct pool freefile_pool;
359STATIC struct pool diradd_pool;
360STATIC struct pool mkdir_pool;
361STATIC struct pool dirrem_pool;
362STATIC struct pool newdirblk_pool;
363
364static __inline void
365softdep_free(struct worklist *item, int type)
366{
367
368 switch (type) {
369 case D_PAGEDEP0:
370 pool_put(&pagedep_pool, item);
371 break;
372
373 case D_INODEDEP1:
374 pool_put(&inodedep_pool, item);
375 break;
376
377 case D_BMSAFEMAP3:
378 pool_put(&bmsafemap_pool, item);
379 break;
380
381 case D_ALLOCDIRECT4:
382 pool_put(&allocdirect_pool, item);
383 break;
384
385 case D_INDIRDEP5:
386 pool_put(&indirdep_pool, item);
387 break;
388
389 case D_ALLOCINDIR6:
390 pool_put(&allocindir_pool, item);
391 break;
392
393 case D_FREEFRAG7:
394 pool_put(&freefrag_pool, item);
395 break;
396
397 case D_FREEBLKS8:
398 pool_put(&freeblks_pool, item);
399 break;
400
401 case D_FREEFILE9:
402 pool_put(&freefile_pool, item);
403 break;
404
405 case D_DIRADD10:
406 pool_put(&diradd_pool, item);
407 break;
408
409 case D_MKDIR11:
410 pool_put(&mkdir_pool, item);
411 break;
412
413 case D_DIRREM12:
414 pool_put(&dirrem_pool, item);
415 break;
416
417 case D_NEWDIRBLK13:
418 pool_put(&newdirblk_pool, item);
419 break;
420
421 default:
422#ifdef DEBUG
423 if (lk.lkt_held != -1)
424 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
425#endif
426 panic("softdep_free: unknown type %d", type);
427 }
428}
429
430struct workhead softdep_freequeue;
431
432static __inline void
433softdep_freequeue_add(struct worklist *item)
434{
435 int s;
436
437 s = splbio()splraise(0x6);
438 LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list)do { if (((item)->wk_list.le_next = (&softdep_freequeue
)->lh_first) != ((void *)0)) (&softdep_freequeue)->
lh_first->wk_list.le_prev = &(item)->wk_list.le_next
; (&softdep_freequeue)->lh_first = (item); (item)->
wk_list.le_prev = &(&softdep_freequeue)->lh_first;
} while (0)
;
439 splx(s)spllower(s);
440}
441
442static __inline void
443softdep_freequeue_process(void)
444{
445 struct worklist *wk;
446
447 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
448
449 while ((wk = LIST_FIRST(&softdep_freequeue)((&softdep_freequeue)->lh_first)) != NULL((void *)0)) {
450 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
451 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
452 softdep_free(wk, wk->wk_type);
453 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
454 }
455}
456
457/*
458 * Worklist queue management.
459 * These routines require that the lock be held.
460 */
461#ifndef /* NOT */ DEBUG
462#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
do { \
463 (item)->wk_state |= ONWORKLIST0x8000; \
464 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
; \
465} while (0)
466#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
do { \
467 (item)->wk_state &= ~ONWORKLIST0x8000; \
468 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
; \
469} while (0)
470#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) softdep_freequeue_add((struct worklist *)item)
471
472#else /* DEBUG */
473STATIC void worklist_insert(struct workhead *, struct worklist *);
474STATIC void worklist_remove(struct worklist *);
475STATIC void workitem_free(struct worklist *);
476
477#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
worklist_insert(head, item)
478#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
worklist_remove(item)
479#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) workitem_free((struct worklist *)item)
480
481STATIC void
482worklist_insert(struct workhead *head, struct worklist *item)
483{
484
485 if (lk.lkt_held == -1)
486 panic("worklist_insert: lock not held");
487 if (item->wk_state & ONWORKLIST0x8000) {
488 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
489 panic("worklist_insert: already on list");
490 }
491 item->wk_state |= ONWORKLIST0x8000;
492 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
;
493}
494
495STATIC void
496worklist_remove(struct worklist *item)
497{
498
499 if (lk.lkt_held == -1)
500 panic("worklist_remove: lock not held");
501 if ((item->wk_state & ONWORKLIST0x8000) == 0) {
502 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
503 panic("worklist_remove: not on list");
504 }
505 item->wk_state &= ~ONWORKLIST0x8000;
506 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
;
507}
508
509STATIC void
510workitem_free(struct worklist *item)
511{
512
513 if (item->wk_state & ONWORKLIST0x8000) {
514 if (lk.lkt_held != -1)
515 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
516 panic("workitem_free: still on list");
517 }
518 softdep_freequeue_add(item);
519}
520#endif /* DEBUG */
521
522/*
523 * Workitem queue management
524 */
525STATIC struct workhead softdep_workitem_pending;
526STATIC struct worklist *worklist_tail;
527STATIC int num_on_worklist; /* number of worklist items to be processed */
528STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
529STATIC int softdep_worklist_req; /* serialized waiters */
530STATIC int max_softdeps; /* maximum number of structs before slowdown */
531STATIC int tickdelay = 2; /* number of ticks to pause during slowdown */
532STATIC int proc_waiting; /* tracks whether we have a timeout posted */
533STATIC int *stat_countp; /* statistic to count in proc_waiting timeout */
534STATIC struct timeout proc_waiting_timeout;
535STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
536STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
537#define FLUSH_INODES1 1
538STATIC int req_clear_remove; /* syncer process flush some freeblks */
539#define FLUSH_REMOVE2 2
540/*
541 * runtime statistics
542 */
543STATIC int stat_worklist_push; /* number of worklist cleanups */
544STATIC int stat_blk_limit_push; /* number of times block limit neared */
545STATIC int stat_ino_limit_push; /* number of times inode limit neared */
546STATIC int stat_blk_limit_hit; /* number of times block slowdown imposed */
547STATIC int stat_ino_limit_hit; /* number of times inode slowdown imposed */
548STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
549STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
550STATIC int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
551STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
552STATIC int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
553
554/*
555 * Add an item to the end of the work queue.
556 * This routine requires that the lock be held.
557 * This is the only routine that adds items to the list.
558 * The following routine is the only one that removes items
559 * and does so in order from first to last.
560 */
561STATIC void
562add_to_worklist(struct worklist *wk)
563{
564
565 if (wk->wk_state & ONWORKLIST0x8000) {
566#ifdef DEBUG
567 if (lk.lkt_held != -1)
568 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
569#endif
570 panic("add_to_worklist: already on list");
571 }
572 wk->wk_state |= ONWORKLIST0x8000;
573 if (LIST_FIRST(&softdep_workitem_pending)((&softdep_workitem_pending)->lh_first) == NULL((void *)0))
574 LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list)do { if (((wk)->wk_list.le_next = (&softdep_workitem_pending
)->lh_first) != ((void *)0)) (&softdep_workitem_pending
)->lh_first->wk_list.le_prev = &(wk)->wk_list.le_next
; (&softdep_workitem_pending)->lh_first = (wk); (wk)->
wk_list.le_prev = &(&softdep_workitem_pending)->lh_first
; } while (0)
;
575 else
576 LIST_INSERT_AFTER(worklist_tail, wk, wk_list)do { if (((wk)->wk_list.le_next = (worklist_tail)->wk_list
.le_next) != ((void *)0)) (worklist_tail)->wk_list.le_next
->wk_list.le_prev = &(wk)->wk_list.le_next; (worklist_tail
)->wk_list.le_next = (wk); (wk)->wk_list.le_prev = &
(worklist_tail)->wk_list.le_next; } while (0)
;
577 worklist_tail = wk;
578 num_on_worklist += 1;
579}
580
581/*
582 * Process that runs once per second to handle items in the background queue.
583 *
584 * Note that we ensure that everything is done in the order in which they
585 * appear in the queue. The code below depends on this property to ensure
586 * that blocks of a file are freed before the inode itself is freed. This
587 * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
588 * until all the old ones have been purged from the dependency lists.
589 */
590int
591softdep_process_worklist(struct mount *matchmnt)
592{
593 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
594 int matchcnt, loopcount;
595 struct timeval starttime;
596
597 /*
598 * First process any items on the delayed-free queue.
599 */
600 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
601 softdep_freequeue_process();
602 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
603
604 /*
605 * Record the process identifier of our caller so that we can give
606 * this process preferential treatment in request_cleanup below.
607 * We can't do this in softdep_initialize, because the syncer doesn't
608 * have to run then.
609 * NOTE! This function _could_ be called with a curproc != syncerproc.
610 */
611 filesys_syncer = syncerproc;
612 matchcnt = 0;
613
614 /*
615 * There is no danger of having multiple processes run this
616 * code, but we have to single-thread it when softdep_flushfiles()
617 * is in operation to get an accurate count of the number of items
618 * related to its mount point that are in the list.
619 */
620 if (matchmnt == NULL((void *)0)) {
621 if (softdep_worklist_busy < 0)
622 return(-1);
623 softdep_worklist_busy += 1;
624 }
625
626 /*
627 * If requested, try removing inode or removal dependencies.
628 */
629 if (req_clear_inodedeps) {
630 clear_inodedeps(p);
631 req_clear_inodedeps -= 1;
632 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
633 }
634 if (req_clear_remove) {
635 clear_remove(p);
636 req_clear_remove -= 1;
637 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
638 }
639 loopcount = 1;
Value stored to 'loopcount' is never read
640 getmicrouptime(&starttime);
641 while (num_on_worklist > 0) {
642 if (process_worklist_item(matchmnt, &matchcnt, LK_NOWAIT0x0040UL) == 0)
643 break;
644
645 /*
646 * If a umount operation wants to run the worklist
647 * accurately, abort.
648 */
649 if (softdep_worklist_req && matchmnt == NULL((void *)0)) {
650 matchcnt = -1;
651 break;
652 }
653
654 /*
655 * If requested, try removing inode or removal dependencies.
656 */
657 if (req_clear_inodedeps) {
658 clear_inodedeps(p);
659 req_clear_inodedeps -= 1;
660 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
661 }
662 if (req_clear_remove) {
663 clear_remove(p);
664 req_clear_remove -= 1;
665 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
666 }
667 /*
668 * We do not generally want to stop for buffer space, but if
669 * we are really being a buffer hog, we will stop and wait.
670 */
671#if 0
672 if (loopcount++ % 128 == 0)
673 bwillwrite();
674#endif
675 /*
676 * Never allow processing to run for more than one
677 * second. Otherwise the other syncer tasks may get
678 * excessively backlogged.
679 */
680 {
681 struct timeval diff;
682 struct timeval tv;
683
684 getmicrouptime(&tv);
685 timersub(&tv, &starttime, &diff)do { (&diff)->tv_sec = (&tv)->tv_sec - (&starttime
)->tv_sec; (&diff)->tv_usec = (&tv)->tv_usec
- (&starttime)->tv_usec; if ((&diff)->tv_usec <
0) { (&diff)->tv_sec--; (&diff)->tv_usec += 1000000
; } } while (0)
;
686 if (diff.tv_sec != 0 && matchmnt == NULL((void *)0)) {
687 matchcnt = -1;
688 break;
689 }
690 }
691
692 /*
693 * Process any new items on the delayed-free queue.
694 */
695 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
696 softdep_freequeue_process();
697 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
698 }
699 if (matchmnt == NULL((void *)0)) {
700 softdep_worklist_busy -= 1;
701 if (softdep_worklist_req && softdep_worklist_busy == 0)
702 wakeup(&softdep_worklist_req);
703 }
704 return (matchcnt);
705}
706
707/*
708 * Process one item on the worklist.
709 */
710STATIC int
711process_worklist_item(struct mount *matchmnt, int *matchcnt, int flags)
712{
713 struct worklist *wk, *wkend;
714 struct dirrem *dirrem;
715 struct mount *mp;
716 struct vnode *vp;
717
718 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
719 /*
720 * Normally we just process each item on the worklist in order.
721 * However, if we are in a situation where we cannot lock any
722 * inodes, we have to skip over any dirrem requests whose
723 * vnodes are resident and locked.
724 */
725 LIST_FOREACH(wk, &softdep_workitem_pending, wk_list)for((wk) = ((&softdep_workitem_pending)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
{
726 if ((flags & LK_NOWAIT0x0040UL) == 0 || wk->wk_type != D_DIRREM12)
727 break;
728 dirrem = WK_DIRREM(wk)((struct dirrem *)(wk));
729 vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)((struct ufsmount *)((dirrem->dm_mnt)->mnt_data))->um_dev,
730 dirrem->dm_oldinum);
731 if (vp == NULL((void *)0) || !VOP_ISLOCKED(vp))
732 break;
733 }
734 if (wk == NULL((void *)0)) {
735 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
736 return (0);
737 }
738 /*
739 * Remove the item to be processed. If we are removing the last
740 * item on the list, we need to recalculate the tail pointer.
741 * As this happens rarely and usually when the list is short,
742 * we just run down the list to find it rather than tracking it
743 * in the above loop.
744 */
745 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
746 if (wk == worklist_tail) {
747 LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)for((wkend) = ((&softdep_workitem_pending)->lh_first);
(wkend)!= ((void *)0); (wkend) = ((wkend)->wk_list.le_next
))
748 if (LIST_NEXT(wkend, wk_list)((wkend)->wk_list.le_next) == NULL((void *)0))
749 break;
750 worklist_tail = wkend;
751 }
752 num_on_worklist -= 1;
753 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
754 switch (wk->wk_type) {
755
756 case D_DIRREM12:
757 /* removal of a directory entry */
758 mp = WK_DIRREM(wk)((struct dirrem *)(wk))->dm_mnt;
759#if 0
760 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
761 panic("%s: dirrem on suspended filesystem",
762 "process_worklist_item");
763#endif
764 if (matchmnt != NULL((void *)0) && mp == matchmnt)
765 *matchcnt += 1;
766 handle_workitem_remove(WK_DIRREM(wk)((struct dirrem *)(wk)));
767 break;
768
769 case D_FREEBLKS8:
770 /* releasing blocks and/or fragments from a file */
771 mp = WK_FREEBLKS(wk)((struct freeblks *)(wk))->fb_mnt;
772#if 0
773 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
774 panic("%s: freeblks on suspended filesystem",
775 "process_worklist_item");
776#endif
777 if (matchmnt != NULL((void *)0) && mp == matchmnt)
778 *matchcnt += 1;
779 handle_workitem_freeblocks(WK_FREEBLKS(wk)((struct freeblks *)(wk)));
780 break;
781
782 case D_FREEFRAG7:
783 /* releasing a fragment when replaced as a file grows */
784 mp = WK_FREEFRAG(wk)((struct freefrag *)(wk))->ff_mnt;
785#if 0
786 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
787 panic("%s: freefrag on suspended filesystem",
788 "process_worklist_item");
789#endif
790 if (matchmnt != NULL((void *)0) && mp == matchmnt)
791 *matchcnt += 1;
792 handle_workitem_freefrag(WK_FREEFRAG(wk)((struct freefrag *)(wk)));
793 break;
794
795 case D_FREEFILE9:
796 /* releasing an inode when its link count drops to 0 */
797 mp = WK_FREEFILE(wk)((struct freefile *)(wk))->fx_mnt;
798#if 0
799 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
800 panic("%s: freefile on suspended filesystem",
801 "process_worklist_item");
802#endif
803 if (matchmnt != NULL((void *)0) && mp == matchmnt)
804 *matchcnt += 1;
805 handle_workitem_freefile(WK_FREEFILE(wk)((struct freefile *)(wk)));
806 break;
807
808 default:
809 panic("%s_process_worklist: Unknown type %s",
810 "softdep", TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
811 /* NOTREACHED */
812 }
813 return (1);
814}
815
816/*
817 * Move dependencies from one buffer to another.
818 */
819void
820softdep_move_dependencies(struct buf *oldbp, struct buf *newbp)
821{
822 struct worklist *wk, *wktail;
823
824 if (LIST_FIRST(&newbp->b_dep)((&newbp->b_dep)->lh_first) != NULL((void *)0))
825 panic("softdep_move_dependencies: need merge code");
826 wktail = NULL((void *)0);
827 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
828 while ((wk = LIST_FIRST(&oldbp->b_dep)((&oldbp->b_dep)->lh_first)) != NULL((void *)0)) {
829 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
830 if (wktail == NULL((void *)0))
831 LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list)do { if (((wk)->wk_list.le_next = (&newbp->b_dep)->
lh_first) != ((void *)0)) (&newbp->b_dep)->lh_first
->wk_list.le_prev = &(wk)->wk_list.le_next; (&newbp
->b_dep)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&newbp->b_dep)->lh_first; } while (0)
;
832 else
833 LIST_INSERT_AFTER(wktail, wk, wk_list)do { if (((wk)->wk_list.le_next = (wktail)->wk_list.le_next
) != ((void *)0)) (wktail)->wk_list.le_next->wk_list.le_prev
= &(wk)->wk_list.le_next; (wktail)->wk_list.le_next
= (wk); (wk)->wk_list.le_prev = &(wktail)->wk_list
.le_next; } while (0)
;
834 wktail = wk;
835 }
836 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
837}
838
839/*
840 * Purge the work list of all items associated with a particular mount point.
841 */
842int
843softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
844{
845 struct vnode *devvp;
846 int count, error = 0;
847
848 /*
849 * Await our turn to clear out the queue, then serialize access.
850 */
851 while (softdep_worklist_busy) {
852 softdep_worklist_req += 1;
853 tsleep_nsec(&softdep_worklist_req, PRIBIO16, "softflush", INFSLP0xffffffffffffffffULL);
854 softdep_worklist_req -= 1;
855 }
856 softdep_worklist_busy = -1;
857 /*
858 * Alternately flush the block device associated with the mount
859 * point and process any dependencies that the flushing
860 * creates. We continue until no more worklist dependencies
861 * are found.
862 */
863 *countp = 0;
864 devvp = VFSTOUFS(oldmnt)((struct ufsmount *)((oldmnt)->mnt_data))->um_devvp;
865 while ((count = softdep_process_worklist(oldmnt)) > 0) {
866 *countp += count;
867 vn_lock(devvp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
868 error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT1, p);
869 VOP_UNLOCK(devvp);
870 if (error)
871 break;
872 }
873 softdep_worklist_busy = 0;
874 if (softdep_worklist_req)
875 wakeup(&softdep_worklist_req);
876 return (error);
877}
878
879/*
880 * Flush all vnodes and worklist items associated with a specified mount point.
881 */
882int
883softdep_flushfiles(struct mount *oldmnt, int flags, struct proc *p)
884{
885 int error, count, loopcnt;
886
887 /*
888 * Alternately flush the vnodes associated with the mount
889 * point and process any dependencies that the flushing
890 * creates. In theory, this loop can happen at most twice,
891 * but we give it a few extra just to be sure.
892 */
893 for (loopcnt = 10; loopcnt > 0; loopcnt--) {
894 /*
895 * Do another flush in case any vnodes were brought in
896 * as part of the cleanup operations.
897 */
898 if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
899 break;
900 if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
901 count == 0)
902 break;
903 }
904 /*
905 * If the reboot process sleeps during the loop, the update
906 * process may call softdep_process_worklist() and create
907 * new dirty vnodes at the mount point. Call ffs_flushfiles()
908 * again after the loop has flushed all soft dependencies.
909 */
910 if (error == 0)
911 error = ffs_flushfiles(oldmnt, flags, p);
912 /*
913 * If we are unmounting then it is an error to fail. If we
914 * are simply trying to downgrade to read-only, then filesystem
915 * activity can keep us busy forever, so we just fail with EBUSY.
916 */
917 if (loopcnt == 0) {
918 error = EBUSY16;
919 }
920 return (error);
921}
922
923/*
924 * Structure hashing.
925 *
926 * There are three types of structures that can be looked up:
927 * 1) pagedep structures identified by mount point, inode number,
928 * and logical block.
929 * 2) inodedep structures identified by mount point and inode number.
930 * 3) newblk structures identified by mount point and
931 * physical block number.
932 *
933 * The "pagedep" and "inodedep" dependency structures are hashed
934 * separately from the file blocks and inodes to which they correspond.
935 * This separation helps when the in-memory copy of an inode or
936 * file block must be replaced. It also obviates the need to access
937 * an inode or file page when simply updating (or de-allocating)
938 * dependency structures. Lookup of newblk structures is needed to
939 * find newly allocated blocks when trying to associate them with
940 * their allocdirect or allocindir structure.
941 *
942 * The lookup routines optionally create and hash a new instance when
943 * an existing entry is not found.
944 */
945#define DEPALLOC0x0001 0x0001 /* allocate structure if lookup fails */
946#define NODELAY0x0002 0x0002 /* cannot do background work */
947
948SIPHASH_KEY softdep_hashkey;
949
950/*
951 * Structures and routines associated with pagedep caching.
952 */
953LIST_HEAD(pagedep_hashhead, pagedep)struct pagedep_hashhead { struct pagedep *lh_first; } *pagedep_hashtbl;
954u_long pagedep_hash; /* size of hash table - 1 */
955STATIC struct sema pagedep_in_progress;
956
957/*
958 * Look up a pagedep. Return 1 if found, 0 if not found or found
959 * when asked to allocate but not associated with any buffer.
960 * If not found, allocate if DEPALLOC flag is passed.
961 * Found or allocated entry is returned in pagedeppp.
962 * This routine must be called with splbio interrupts blocked.
963 */
964STATIC int
965pagedep_lookup(struct inode *ip, daddr_t lbn, int flags,
966 struct pagedep **pagedeppp)
967{
968 SIPHASH_CTX ctx;
969 struct pagedep *pagedep;
970 struct pagedep_hashhead *pagedephd;
971 struct mount *mp;
972 int i;
973
974 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
975
976#ifdef DEBUG
977 if (lk.lkt_held == -1)
978 panic("pagedep_lookup: lock not held");
979#endif
980 mp = ITOV(ip)((ip)->i_vnode)->v_mount;
981
982 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
983 SipHash24_Update(&ctx, &mp, sizeof(mp))SipHash_Update((&ctx), 2, 4, (&mp), (sizeof(mp)));
984 SipHash24_Update(&ctx, &ip->i_number, sizeof(ip->i_number))SipHash_Update((&ctx), 2, 4, (&ip->i_number), (sizeof
(ip->i_number)))
;
985 SipHash24_Update(&ctx, &lbn, sizeof(lbn))SipHash_Update((&ctx), 2, 4, (&lbn), (sizeof(lbn)));
986 pagedephd = &pagedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & pagedep_hash];
987top:
988 LIST_FOREACH(pagedep, pagedephd, pd_hash)for((pagedep) = ((pagedephd)->lh_first); (pagedep)!= ((void
*)0); (pagedep) = ((pagedep)->pd_hash.le_next))
989 if (ip->i_number == pagedep->pd_ino &&
990 lbn == pagedep->pd_lbn &&
991 mp == pagedep->pd_mnt)
992 break;
993 if (pagedep) {
994 *pagedeppp = pagedep;
995 if ((flags & DEPALLOC0x0001) != 0 &&
996 (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
997 return (0);
998 return (1);
999 }
1000 if ((flags & DEPALLOC0x0001) == 0) {
1001 *pagedeppp = NULL((void *)0);
1002 return (0);
1003 }
1004 if (sema_get(&pagedep_in_progress, &lk) == 0) {
1005 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1006 goto top;
1007 }
1008 pagedep = pool_get(&pagedep_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1009 pagedep->pd_list.wk_type = D_PAGEDEP0;
1010 pagedep->pd_mnt = mp;
1011 pagedep->pd_ino = ip->i_number;
1012 pagedep->pd_lbn = lbn;
1013 LIST_INIT(&pagedep->pd_dirremhd)do { ((&pagedep->pd_dirremhd)->lh_first) = ((void *
)0); } while (0)
;
1014 LIST_INIT(&pagedep->pd_pendinghd)do { ((&pagedep->pd_pendinghd)->lh_first) = ((void *
)0); } while (0)
;
1015 for (i = 0; i < DAHASHSZ6; i++)
1016 LIST_INIT(&pagedep->pd_diraddhd[i])do { ((&pagedep->pd_diraddhd[i])->lh_first) = ((void
*)0); } while (0)
;
1017 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1018 LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash)do { if (((pagedep)->pd_hash.le_next = (pagedephd)->lh_first
) != ((void *)0)) (pagedephd)->lh_first->pd_hash.le_prev
= &(pagedep)->pd_hash.le_next; (pagedephd)->lh_first
= (pagedep); (pagedep)->pd_hash.le_prev = &(pagedephd
)->lh_first; } while (0)
;
1019 sema_release(&pagedep_in_progress);
1020 *pagedeppp = pagedep;
1021 return (0);
1022}
1023
1024/*
1025 * Structures and routines associated with inodedep caching.
1026 */
1027LIST_HEAD(inodedep_hashhead, inodedep)struct inodedep_hashhead { struct inodedep *lh_first; } *inodedep_hashtbl;
1028STATIC u_long inodedep_hash; /* size of hash table - 1 */
1029STATIC long num_inodedep; /* number of inodedep allocated */
1030STATIC struct sema inodedep_in_progress;
1031
1032/*
1033 * Look up a inodedep. Return 1 if found, 0 if not found.
1034 * If not found, allocate if DEPALLOC flag is passed.
1035 * Found or allocated entry is returned in inodedeppp.
1036 * This routine must be called with splbio interrupts blocked.
1037 */
1038STATIC int
1039inodedep_lookup(struct fs *fs, ufsino_t inum, int flags,
1040 struct inodedep **inodedeppp)
1041{
1042 SIPHASH_CTX ctx;
1043 struct inodedep *inodedep;
1044 struct inodedep_hashhead *inodedephd;
1045 int firsttry;
1046
1047 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
1048
1049#ifdef DEBUG
1050 if (lk.lkt_held == -1)
1051 panic("inodedep_lookup: lock not held");
1052#endif
1053 firsttry = 1;
1054 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1055 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1056 SipHash24_Update(&ctx, &inum, sizeof(inum))SipHash_Update((&ctx), 2, 4, (&inum), (sizeof(inum)));
1057 inodedephd = &inodedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & inodedep_hash];
1058top:
1059 LIST_FOREACH(inodedep, inodedephd, id_hash)for((inodedep) = ((inodedephd)->lh_first); (inodedep)!= ((
void *)0); (inodedep) = ((inodedep)->id_hash.le_next))
1060 if (inum == inodedep->id_ino && fs == inodedep->id_fs)
1061 break;
1062 if (inodedep) {
1063 *inodedeppp = inodedep;
1064 return (1);
1065 }
1066 if ((flags & DEPALLOC0x0001) == 0) {
1067 *inodedeppp = NULL((void *)0);
1068 return (0);
1069 }
1070 /*
1071 * If we are over our limit, try to improve the situation.
1072 */
1073 if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY0x0002) == 0 &&
1074 request_cleanup(FLUSH_INODES1, 1)) {
1075 firsttry = 0;
1076 goto top;
1077 }
1078 if (sema_get(&inodedep_in_progress, &lk) == 0) {
1079 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1080 goto top;
1081 }
1082 num_inodedep += 1;
1083 inodedep = pool_get(&inodedep_pool, PR_WAITOK0x0001);
1084 inodedep->id_list.wk_type = D_INODEDEP1;
1085 inodedep->id_fs = fs;
1086 inodedep->id_ino = inum;
1087 inodedep->id_stateid_list.wk_state = ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
1088 inodedep->id_nlinkdelta = 0;
1089 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
1090 inodedep->id_savedsize = -1;
1091 inodedep->id_buf = NULL((void *)0);
1092 LIST_INIT(&inodedep->id_pendinghd)do { ((&inodedep->id_pendinghd)->lh_first) = ((void
*)0); } while (0)
;
1093 LIST_INIT(&inodedep->id_inowait)do { ((&inodedep->id_inowait)->lh_first) = ((void *
)0); } while (0)
;
1094 LIST_INIT(&inodedep->id_bufwait)do { ((&inodedep->id_bufwait)->lh_first) = ((void *
)0); } while (0)
;
1095 TAILQ_INIT(&inodedep->id_inoupdt)do { (&inodedep->id_inoupdt)->tqh_first = ((void *)
0); (&inodedep->id_inoupdt)->tqh_last = &(&
inodedep->id_inoupdt)->tqh_first; } while (0)
;
1096 TAILQ_INIT(&inodedep->id_newinoupdt)do { (&inodedep->id_newinoupdt)->tqh_first = ((void
*)0); (&inodedep->id_newinoupdt)->tqh_last = &
(&inodedep->id_newinoupdt)->tqh_first; } while (0)
;
1097 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1098 LIST_INSERT_HEAD(inodedephd, inodedep, id_hash)do { if (((inodedep)->id_hash.le_next = (inodedephd)->lh_first
) != ((void *)0)) (inodedephd)->lh_first->id_hash.le_prev
= &(inodedep)->id_hash.le_next; (inodedephd)->lh_first
= (inodedep); (inodedep)->id_hash.le_prev = &(inodedephd
)->lh_first; } while (0)
;
1099 sema_release(&inodedep_in_progress);
1100 *inodedeppp = inodedep;
1101 return (0);
1102}
1103
1104/*
1105 * Structures and routines associated with newblk caching.
1106 */
1107LIST_HEAD(newblk_hashhead, newblk)struct newblk_hashhead { struct newblk *lh_first; } *newblk_hashtbl;
1108u_long newblk_hash; /* size of hash table - 1 */
1109STATIC struct sema newblk_in_progress;
1110
1111/*
1112 * Look up a newblk. Return 1 if found, 0 if not found.
1113 * If not found, allocate if DEPALLOC flag is passed.
1114 * Found or allocated entry is returned in newblkpp.
1115 */
1116STATIC int
1117newblk_lookup(struct fs *fs, daddr_t newblkno, int flags,
1118 struct newblk **newblkpp)
1119{
1120 SIPHASH_CTX ctx;
1121 struct newblk *newblk;
1122 struct newblk_hashhead *newblkhd;
1123
1124 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1125 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1126 SipHash24_Update(&ctx, &newblkno, sizeof(newblkno))SipHash_Update((&ctx), 2, 4, (&newblkno), (sizeof(newblkno
)))
;
1127 newblkhd = &newblk_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & newblk_hash];
1128top:
1129 LIST_FOREACH(newblk, newblkhd, nb_hash)for((newblk) = ((newblkhd)->lh_first); (newblk)!= ((void *
)0); (newblk) = ((newblk)->nb_hash.le_next))
1130 if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
1131 break;
1132 if (newblk) {
1133 *newblkpp = newblk;
1134 return (1);
1135 }
1136 if ((flags & DEPALLOC0x0001) == 0) {
1137 *newblkpp = NULL((void *)0);
1138 return (0);
1139 }
1140 if (sema_get(&newblk_in_progress, NULL((void *)0)) == 0)
1141 goto top;
1142 newblk = pool_get(&newblk_pool, PR_WAITOK0x0001);
1143 newblk->nb_state = 0;
1144 newblk->nb_fs = fs;
1145 newblk->nb_newblkno = newblkno;
1146 LIST_INSERT_HEAD(newblkhd, newblk, nb_hash)do { if (((newblk)->nb_hash.le_next = (newblkhd)->lh_first
) != ((void *)0)) (newblkhd)->lh_first->nb_hash.le_prev
= &(newblk)->nb_hash.le_next; (newblkhd)->lh_first
= (newblk); (newblk)->nb_hash.le_prev = &(newblkhd)->
lh_first; } while (0)
;
1147 sema_release(&newblk_in_progress);
1148 *newblkpp = newblk;
1149 return (0);
1150}
1151
1152/*
1153 * Executed during filesystem system initialization before
1154 * mounting any file systems.
1155 */
1156void
1157softdep_initialize(void)
1158{
1159
1160 bioops.io_start = softdep_disk_io_initiation;
1161 bioops.io_complete = softdep_disk_write_complete;
1162 bioops.io_deallocate = softdep_deallocate_dependencies;
1163 bioops.io_movedeps = softdep_move_dependencies;
1164 bioops.io_countdeps = softdep_count_dependencies;
1165
1166 LIST_INIT(&mkdirlisthd)do { ((&mkdirlisthd)->lh_first) = ((void *)0); } while
(0)
;
1167 LIST_INIT(&softdep_workitem_pending)do { ((&softdep_workitem_pending)->lh_first) = ((void *
)0); } while (0)
;
1168#ifdef KMEMSTATS1
1169 max_softdeps = min (initialvnodes * 8,
1170 kmemstats[M_INODEDEP79].ks_limit / (2 * sizeof(struct inodedep)));
1171#else
1172 max_softdeps = initialvnodes * 4;
1173#endif
1174 arc4random_buf(&softdep_hashkey, sizeof(softdep_hashkey));
1175 pagedep_hashtbl = hashinit(initialvnodes / 5, M_PAGEDEP78, M_WAITOK0x0001,
1176 &pagedep_hash);
1177 sema_init(&pagedep_in_progress, "pagedep", PRIBIO16);
1178 inodedep_hashtbl = hashinit(initialvnodes, M_INODEDEP79, M_WAITOK0x0001,
1179 &inodedep_hash);
1180 sema_init(&inodedep_in_progress, "inodedep", PRIBIO16);
1181 newblk_hashtbl = hashinit(64, M_NEWBLK80, M_WAITOK0x0001, &newblk_hash);
1182 sema_init(&newblk_in_progress, "newblk", PRIBIO16);
1183 timeout_set(&proc_waiting_timeout, pause_timer, NULL((void *)0));
1184 pool_init(&pagedep_pool, sizeof(struct pagedep), 0, IPL_NONE0x0,
1185 PR_WAITOK0x0001, "pagedep", NULL((void *)0));
1186 pool_init(&inodedep_pool, sizeof(struct inodedep), 0, IPL_NONE0x0,
1187 PR_WAITOK0x0001, "inodedep", NULL((void *)0));
1188 pool_init(&newblk_pool, sizeof(struct newblk), 0, IPL_NONE0x0,
1189 PR_WAITOK0x0001, "newblk", NULL((void *)0));
1190 pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, IPL_NONE0x0,
1191 PR_WAITOK0x0001, "bmsafemap", NULL((void *)0));
1192 pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, IPL_NONE0x0,
1193 PR_WAITOK0x0001, "allocdir", NULL((void *)0));
1194 pool_init(&indirdep_pool, sizeof(struct indirdep), 0, IPL_NONE0x0,
1195 PR_WAITOK0x0001, "indirdep", NULL((void *)0));
1196 pool_init(&allocindir_pool, sizeof(struct allocindir), 0, IPL_NONE0x0,
1197 PR_WAITOK0x0001, "allocindir", NULL((void *)0));
1198 pool_init(&freefrag_pool, sizeof(struct freefrag), 0, IPL_NONE0x0,
1199 PR_WAITOK0x0001, "freefrag", NULL((void *)0));
1200 pool_init(&freeblks_pool, sizeof(struct freeblks), 0, IPL_NONE0x0,
1201 PR_WAITOK0x0001, "freeblks", NULL((void *)0));
1202 pool_init(&freefile_pool, sizeof(struct freefile), 0, IPL_NONE0x0,
1203 PR_WAITOK0x0001, "freefile", NULL((void *)0));
1204 pool_init(&diradd_pool, sizeof(struct diradd), 0, IPL_NONE0x0,
1205 PR_WAITOK0x0001, "diradd", NULL((void *)0));
1206 pool_init(&mkdir_pool, sizeof(struct mkdir), 0, IPL_NONE0x0,
1207 PR_WAITOK0x0001, "mkdir", NULL((void *)0));
1208 pool_init(&dirrem_pool, sizeof(struct dirrem), 0, IPL_NONE0x0,
1209 PR_WAITOK0x0001, "dirrem", NULL((void *)0));
1210 pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, IPL_NONE0x0,
1211 PR_WAITOK0x0001, "newdirblk", NULL((void *)0));
1212}
1213
1214/*
1215 * Called at mount time to notify the dependency code that a
1216 * filesystem wishes to use it.
1217 */
1218int
1219softdep_mount(struct vnode *devvp, struct mount *mp, struct fs *fs,
1220 struct ucred *cred)
1221{
1222 struct csum_total cstotal;
1223 struct cg *cgp;
1224 struct buf *bp;
1225 int error, cyl;
1226
1227 /*
1228 * When doing soft updates, the counters in the
1229 * superblock may have gotten out of sync, so we have
1230 * to scan the cylinder groups and recalculate them.
1231 */
1232 if ((fs->fs_flags & FS_UNCLEAN0x01) == 0)
1233 return (0);
1234 memset(&cstotal, 0, sizeof(cstotal))__builtin_memset((&cstotal), (0), (sizeof(cstotal)));
1235 for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
1236 if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl))((((((daddr_t)(fs)->fs_fpg * (cyl)) + (fs)->fs_cgoffset
* ((cyl) & ~((fs)->fs_cgmask))) + (fs)->fs_cblkno)
) << (fs)->fs_fsbtodb)
,
1237 fs->fs_cgsize, &bp)) != 0) {
1238 brelse(bp);
1239 return (error);
1240 }
1241 cgp = (struct cg *)bp->b_data;
1242 cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
1243 cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
1244 cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
1245 cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
1246 fs->fs_cs(fs, cyl)fs_csp[cyl] = cgp->cg_cs;
1247 brelse(bp);
1248 }
1249#ifdef DEBUG
1250 if (memcmp(&cstotal, &fs->fs_cstotal, sizeof(cstotal))__builtin_memcmp((&cstotal), (&fs->fs_cstotal), (sizeof
(cstotal)))
)
1251 printf("ffs_mountfs: superblock updated for soft updates\n");
1252#endif
1253 memcpy(&fs->fs_cstotal, &cstotal, sizeof(cstotal))__builtin_memcpy((&fs->fs_cstotal), (&cstotal), (sizeof
(cstotal)))
;
1254 return (0);
1255}
1256
1257/*
1258 * Protecting the freemaps (or bitmaps).
1259 *
1260 * To eliminate the need to execute fsck before mounting a file system
1261 * after a power failure, one must (conservatively) guarantee that the
1262 * on-disk copy of the bitmaps never indicate that a live inode or block is
1263 * free. So, when a block or inode is allocated, the bitmap should be
1264 * updated (on disk) before any new pointers. When a block or inode is
1265 * freed, the bitmap should not be updated until all pointers have been
1266 * reset. The latter dependency is handled by the delayed de-allocation
1267 * approach described below for block and inode de-allocation. The former
1268 * dependency is handled by calling the following procedure when a block or
1269 * inode is allocated. When an inode is allocated an "inodedep" is created
1270 * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
1271 * Each "inodedep" is also inserted into the hash indexing structure so
1272 * that any additional link additions can be made dependent on the inode
1273 * allocation.
1274 *
1275 * The ufs file system maintains a number of free block counts (e.g., per
1276 * cylinder group, per cylinder and per <cylinder, rotational position> pair)
1277 * in addition to the bitmaps. These counts are used to improve efficiency
1278 * during allocation and therefore must be consistent with the bitmaps.
1279 * There is no convenient way to guarantee post-crash consistency of these
1280 * counts with simple update ordering, for two main reasons: (1) The counts
1281 * and bitmaps for a single cylinder group block are not in the same disk
1282 * sector. If a disk write is interrupted (e.g., by power failure), one may
1283 * be written and the other not. (2) Some of the counts are located in the
1284 * superblock rather than the cylinder group block. So, we focus our soft
1285 * updates implementation on protecting the bitmaps. When mounting a
1286 * filesystem, we recompute the auxiliary counts from the bitmaps.
1287 */
1288
1289/*
1290 * Called just after updating the cylinder group block to allocate an inode.
1291 */
1292/* buffer for cylgroup block with inode map */
1293/* inode related to allocation */
1294/* new inode number being allocated */
1295void
1296softdep_setup_inomapdep(struct buf *bp, struct inode *ip, ufsino_t newinum)
1297{
1298 struct inodedep *inodedep;
1299 struct bmsafemap *bmsafemap;
1300
1301 /*
1302 * Create a dependency for the newly allocated inode.
1303 * Panic if it already exists as something is seriously wrong.
1304 * Otherwise add it to the dependency list for the buffer holding
1305 * the cylinder group map from which it was allocated.
1306 */
1307 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1308 if (inodedep_lookup(ip->i_fsinode_u.fs, newinum, DEPALLOC0x0001 | NODELAY0x0002, &inodedep)
1309 != 0) {
1310 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1311 panic("softdep_setup_inomapdep: found inode");
1312 }
1313 inodedep->id_buf = bp;
1314 inodedep->id_stateid_list.wk_state &= ~DEPCOMPLETE0x0008;
1315 bmsafemap = bmsafemap_lookup(bp);
1316 LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps)do { if (((inodedep)->id_deps.le_next = (&bmsafemap->
sm_inodedephd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_inodedephd)->lh_first->id_deps.le_prev = &(inodedep
)->id_deps.le_next; (&bmsafemap->sm_inodedephd)->
lh_first = (inodedep); (inodedep)->id_deps.le_prev = &
(&bmsafemap->sm_inodedephd)->lh_first; } while (0)
;
1317 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1318}
1319
1320/*
1321 * Called just after updating the cylinder group block to
1322 * allocate block or fragment.
1323 */
1324/* buffer for cylgroup block with block map */
1325/* filesystem doing allocation */
1326/* number of newly allocated block */
1327void
1328softdep_setup_blkmapdep(struct buf *bp, struct fs *fs, daddr_t newblkno)
1329{
1330 struct newblk *newblk;
1331 struct bmsafemap *bmsafemap;
1332
1333 /*
1334 * Create a dependency for the newly allocated block.
1335 * Add it to the dependency list for the buffer holding
1336 * the cylinder group map from which it was allocated.
1337 */
1338 if (newblk_lookup(fs, newblkno, DEPALLOC0x0001, &newblk) != 0)
1339 panic("softdep_setup_blkmapdep: found block");
1340 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1341 newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
1342 LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps)do { if (((newblk)->nb_deps.le_next = (&bmsafemap->
sm_newblkhd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_newblkhd)->lh_first->nb_deps.le_prev = &(newblk)
->nb_deps.le_next; (&bmsafemap->sm_newblkhd)->lh_first
= (newblk); (newblk)->nb_deps.le_prev = &(&bmsafemap
->sm_newblkhd)->lh_first; } while (0)
;
1343 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1344}
1345
1346/*
1347 * Find the bmsafemap associated with a cylinder group buffer.
1348 * If none exists, create one. The buffer must be locked when
1349 * this routine is called and this routine must be called with
1350 * splbio interrupts blocked.
1351 */
1352STATIC struct bmsafemap *
1353bmsafemap_lookup(struct buf *bp)
1354{
1355 struct bmsafemap *bmsafemap;
1356 struct worklist *wk;
1357
1358 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
1359
1360#ifdef DEBUG
1361 if (lk.lkt_held == -1)
1362 panic("bmsafemap_lookup: lock not held");
1363#endif
1364 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
1365 if (wk->wk_type == D_BMSAFEMAP3)
1366 return (WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk)));
1367 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1368 bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK0x0001);
1369 bmsafemap->sm_list.wk_type = D_BMSAFEMAP3;
1370 bmsafemap->sm_list.wk_state = 0;
1371 bmsafemap->sm_buf = bp;
1372 LIST_INIT(&bmsafemap->sm_allocdirecthd)do { ((&bmsafemap->sm_allocdirecthd)->lh_first) = (
(void *)0); } while (0)
;
1373 LIST_INIT(&bmsafemap->sm_allocindirhd)do { ((&bmsafemap->sm_allocindirhd)->lh_first) = ((
void *)0); } while (0)
;
1374 LIST_INIT(&bmsafemap->sm_inodedephd)do { ((&bmsafemap->sm_inodedephd)->lh_first) = ((void
*)0); } while (0)
;
1375 LIST_INIT(&bmsafemap->sm_newblkhd)do { ((&bmsafemap->sm_newblkhd)->lh_first) = ((void
*)0); } while (0)
;
1376 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1377 WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list)do { (&bmsafemap->sm_list)->wk_state |= 0x8000; do {
if (((&bmsafemap->sm_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&bmsafemap->
sm_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&bmsafemap->sm_list); (&bmsafemap->sm_list)
->wk_list.le_prev = &(&bp->b_dep)->lh_first;
} while (0); } while (0)
;
1378 return (bmsafemap);
1379}
1380
1381/*
1382 * Direct block allocation dependencies.
1383 *
1384 * When a new block is allocated, the corresponding disk locations must be
1385 * initialized (with zeros or new data) before the on-disk inode points to
1386 * them. Also, the freemap from which the block was allocated must be
1387 * updated (on disk) before the inode's pointer. These two dependencies are
1388 * independent of each other and are needed for all file blocks and indirect
1389 * blocks that are pointed to directly by the inode. Just before the
1390 * "in-core" version of the inode is updated with a newly allocated block
1391 * number, a procedure (below) is called to setup allocation dependency
1392 * structures. These structures are removed when the corresponding
1393 * dependencies are satisfied or when the block allocation becomes obsolete
1394 * (i.e., the file is deleted, the block is de-allocated, or the block is a
1395 * fragment that gets upgraded). All of these cases are handled in
1396 * procedures described later.
1397 *
1398 * When a file extension causes a fragment to be upgraded, either to a larger
1399 * fragment or to a full block, the on-disk location may change (if the
1400 * previous fragment could not simply be extended). In this case, the old
1401 * fragment must be de-allocated, but not until after the inode's pointer has
1402 * been updated. In most cases, this is handled by later procedures, which
1403 * will construct a "freefrag" structure to be added to the workitem queue
1404 * when the inode update is complete (or obsolete). The main exception to
1405 * this is when an allocation occurs while a pending allocation dependency
1406 * (for the same block pointer) remains. This case is handled in the main
1407 * allocation dependency setup procedure by immediately freeing the
1408 * unreferenced fragments.
1409 */
1410/* inode to which block is being added */
1411/* block pointer within inode */
1412/* disk block number being added */
1413/* previous block number, 0 unless frag */
1414/* size of new block */
1415/* size of new block */
1416/* bp for allocated block */
1417void
1418softdep_setup_allocdirect(struct inode *ip, daddr_t lbn, daddr_t newblkno,
1419 daddr_t oldblkno, long newsize, long oldsize, struct buf *bp)
1420{
1421 struct allocdirect *adp, *oldadp;
1422 struct allocdirectlst *adphead;
1423 struct bmsafemap *bmsafemap;
1424 struct inodedep *inodedep;
1425 struct pagedep *pagedep;
1426 struct newblk *newblk;
1427
1428 adp = pool_get(&allocdirect_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1429 adp->ad_list.wk_type = D_ALLOCDIRECT4;
1430 adp->ad_lbn = lbn;
1431 adp->ad_newblkno = newblkno;
1432 adp->ad_oldblkno = oldblkno;
1433 adp->ad_newsize = newsize;
1434 adp->ad_oldsize = oldsize;
1435 adp->ad_statead_list.wk_state = ATTACHED0x0001;
1436 LIST_INIT(&adp->ad_newdirblk)do { ((&adp->ad_newdirblk)->lh_first) = ((void *)0)
; } while (0)
;
1437 if (newblkno == oldblkno)
1438 adp->ad_freefrag = NULL((void *)0);
1439 else
1440 adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
1441
1442 if (newblk_lookup(ip->i_fsinode_u.fs, newblkno, 0, &newblk) == 0)
1443 panic("softdep_setup_allocdirect: lost block");
1444
1445 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1446 inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, DEPALLOC0x0001 | NODELAY0x0002, &inodedep);
1447 adp->ad_inodedep = inodedep;
1448
1449 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1450 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
1451 adp->ad_buf = NULL((void *)0);
1452 } else {
1453 bmsafemap = newblk->nb_bmsafemap;
1454 adp->ad_buf = bmsafemap->sm_buf;
1455 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1456 LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps)do { if (((adp)->ad_deps.le_next = (&bmsafemap->sm_allocdirecthd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocdirecthd
)->lh_first->ad_deps.le_prev = &(adp)->ad_deps.le_next
; (&bmsafemap->sm_allocdirecthd)->lh_first = (adp);
(adp)->ad_deps.le_prev = &(&bmsafemap->sm_allocdirecthd
)->lh_first; } while (0)
;
1457 }
1458 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1459 pool_put(&newblk_pool, newblk);
1460
1461 if (bp == NULL((void *)0)) {
1462 /*
1463 * XXXUBC - Yes, I know how to fix this, but not right now.
1464 */
1465 panic("softdep_setup_allocdirect: Bonk art in the head");
1466 }
1467 WORKLIST_INSERT(&bp->b_dep, &adp->ad_list)do { (&adp->ad_list)->wk_state |= 0x8000; do { if (
((&adp->ad_list)->wk_list.le_next = (&bp->b_dep
)->lh_first) != ((void *)0)) (&bp->b_dep)->lh_first
->wk_list.le_prev = &(&adp->ad_list)->wk_list
.le_next; (&bp->b_dep)->lh_first = (&adp->ad_list
); (&adp->ad_list)->wk_list.le_prev = &(&bp
->b_dep)->lh_first; } while (0); } while (0)
;
1468 if (lbn >= NDADDR12) {
1469 /* allocating an indirect block */
1470 if (oldblkno != 0) {
1471 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1472 panic("softdep_setup_allocdirect: non-zero indir");
1473 }
1474 } else {
1475 /*
1476 * Allocating a direct block.
1477 *
1478 * If we are allocating a directory block, then we must
1479 * allocate an associated pagedep to track additions and
1480 * deletions.
1481 */
1482 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1483 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1484 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
1485 }
1486 /*
1487 * The list of allocdirects must be kept in sorted and ascending
1488 * order so that the rollback routines can quickly determine the
1489 * first uncommitted block (the size of the file stored on disk
1490 * ends at the end of the lowest committed fragment, or if there
1491 * are no fragments, at the end of the highest committed block).
1492 * Since files generally grow, the typical case is that the new
1493 * block is to be added at the end of the list. We speed this
1494 * special case by checking against the last allocdirect in the
1495 * list before laboriously traversing the list looking for the
1496 * insertion point.
1497 */
1498 adphead = &inodedep->id_newinoupdt;
1499 oldadp = TAILQ_LAST(adphead, allocdirectlst)(*(((struct allocdirectlst *)((adphead)->tqh_last))->tqh_last
))
;
1500 if (oldadp == NULL((void *)0) || oldadp->ad_lbn <= lbn) {
1501 /* insert at end of list */
1502 TAILQ_INSERT_TAIL(adphead, adp, ad_next)do { (adp)->ad_next.tqe_next = ((void *)0); (adp)->ad_next
.tqe_prev = (adphead)->tqh_last; *(adphead)->tqh_last =
(adp); (adphead)->tqh_last = &(adp)->ad_next.tqe_next
; } while (0)
;
1503 if (oldadp != NULL((void *)0) && oldadp->ad_lbn == lbn)
1504 allocdirect_merge(adphead, adp, oldadp);
1505 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1506 return;
1507 }
1508 TAILQ_FOREACH(oldadp, adphead, ad_next)for((oldadp) = ((adphead)->tqh_first); (oldadp) != ((void *
)0); (oldadp) = ((oldadp)->ad_next.tqe_next))
{
1509 if (oldadp->ad_lbn >= lbn)
1510 break;
1511 }
1512 if (oldadp == NULL((void *)0)) {
1513 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1514 panic("softdep_setup_allocdirect: lost entry");
1515 }
1516 /* insert in middle of list */
1517 TAILQ_INSERT_BEFORE(oldadp, adp, ad_next)do { (adp)->ad_next.tqe_prev = (oldadp)->ad_next.tqe_prev
; (adp)->ad_next.tqe_next = (oldadp); *(oldadp)->ad_next
.tqe_prev = (adp); (oldadp)->ad_next.tqe_prev = &(adp)
->ad_next.tqe_next; } while (0)
;
1518 if (oldadp->ad_lbn == lbn)
1519 allocdirect_merge(adphead, adp, oldadp);
1520 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1521}
1522
1523/*
1524 * Replace an old allocdirect dependency with a newer one.
1525 * This routine must be called with splbio interrupts blocked.
1526 */
1527/* head of list holding allocdirects */
1528/* allocdirect being added */
1529/* existing allocdirect being checked */
1530STATIC void
1531allocdirect_merge(struct allocdirectlst *adphead, struct allocdirect *newadp,
1532 struct allocdirect *oldadp)
1533{
1534 struct worklist *wk;
1535 struct freefrag *freefrag;
1536 struct newdirblk *newdirblk;
1537
1538 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
1539
1540#ifdef DEBUG
1541 if (lk.lkt_held == -1)
1542 panic("allocdirect_merge: lock not held");
1543#endif
1544 if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
1545 newadp->ad_oldsize != oldadp->ad_newsize ||
1546 newadp->ad_lbn >= NDADDR12) {
1547 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1548 panic("allocdirect_merge: old %lld != new %lld || lbn %lld >= "
1549 "%d", (long long)newadp->ad_oldblkno,
1550 (long long)oldadp->ad_newblkno, (long long)newadp->ad_lbn,
1551 NDADDR12);
1552 }
1553 newadp->ad_oldblkno = oldadp->ad_oldblkno;
1554 newadp->ad_oldsize = oldadp->ad_oldsize;
1555 /*
1556 * If the old dependency had a fragment to free or had never
1557 * previously had a block allocated, then the new dependency
1558 * can immediately post its freefrag and adopt the old freefrag.
1559 * This action is done by swapping the freefrag dependencies.
1560 * The new dependency gains the old one's freefrag, and the
1561 * old one gets the new one and then immediately puts it on
1562 * the worklist when it is freed by free_allocdirect. It is
1563 * not possible to do this swap when the old dependency had a
1564 * non-zero size but no previous fragment to free. This condition
1565 * arises when the new block is an extension of the old block.
1566 * Here, the first part of the fragment allocated to the new
1567 * dependency is part of the block currently claimed on disk by
1568 * the old dependency, so cannot legitimately be freed until the
1569 * conditions for the new dependency are fulfilled.
1570 */
1571 if (oldadp->ad_freefrag != NULL((void *)0) || oldadp->ad_oldblkno == 0) {
1572 freefrag = newadp->ad_freefrag;
1573 newadp->ad_freefrag = oldadp->ad_freefrag;
1574 oldadp->ad_freefrag = freefrag;
1575 }
1576 /*
1577 * If we are tracking a new directory-block allocation,
1578 * move it from the old allocdirect to the new allocdirect.
1579 */
1580 if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
1581 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
1582 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
1583 if (LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first) != NULL((void *)0))
1584 panic("allocdirect_merge: extra newdirblk");
1585 WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
newadp->ad_newdirblk)->lh_first) != ((void *)0)) (&
newadp->ad_newdirblk)->lh_first->wk_list.le_prev = &
(&newdirblk->db_list)->wk_list.le_next; (&newadp
->ad_newdirblk)->lh_first = (&newdirblk->db_list
); (&newdirblk->db_list)->wk_list.le_prev = &(&
newadp->ad_newdirblk)->lh_first; } while (0); } while (
0)
;
1586 }
1587 free_allocdirect(adphead, oldadp, 0);
1588}
1589
1590/*
1591 * Allocate a new freefrag structure if needed.
1592 */
1593STATIC struct freefrag *
1594newfreefrag(struct inode *ip, daddr_t blkno, long size)
1595{
1596 struct freefrag *freefrag;
1597 struct fs *fs;
1598
1599 if (blkno == 0)
1600 return (NULL((void *)0));
1601 fs = ip->i_fsinode_u.fs;
1602 if (fragnum(fs, blkno)((blkno) & ((fs)->fs_frag - 1)) + numfrags(fs, size)((size) >> (fs)->fs_fshift) > fs->fs_frag)
1603 panic("newfreefrag: frag size");
1604 freefrag = pool_get(&freefrag_pool, PR_WAITOK0x0001);
1605 freefrag->ff_list.wk_type = D_FREEFRAG7;
1606 freefrag->ff_stateff_list.wk_state = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
& ~ONWORKLIST0x8000; /* used below */
1607 freefrag->ff_inum = ip->i_number;
1608 freefrag->ff_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1609 freefrag->ff_devvp = ip->i_devvpi_ump->um_devvp;
1610 freefrag->ff_blkno = blkno;
1611 freefrag->ff_fragsize = size;
1612 return (freefrag);
1613}
1614
1615/*
1616 * This workitem de-allocates fragments that were replaced during
1617 * file block allocation.
1618 */
1619STATIC void
1620handle_workitem_freefrag(struct freefrag *freefrag)
1621{
1622 struct inode tip;
1623 struct ufs1_dinode dtip1;
1624
1625 tip.i_vnode = NULL((void *)0);
1626 tip.i_din1dinode_u.ffs1_din = &dtip1;
1627 tip.i_fsinode_u.fs = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data))->um_fsufsmount_u.fs;
1628 tip.i_ump = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data));
1629 tip.i_dev = freefrag->ff_devvp->v_rdevv_un.vu_specinfo->si_rdev;
1630 tip.i_number = freefrag->ff_inum;
1631 tip.i_ffs1_uiddinode_u.ffs1_din->di_uid = freefrag->ff_stateff_list.wk_state & ~ONWORKLIST0x8000; /* set above */
1632 ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
1633 pool_put(&freefrag_pool, freefrag);
1634}
1635
1636/*
1637 * Indirect block allocation dependencies.
1638 *
1639 * The same dependencies that exist for a direct block also exist when
1640 * a new block is allocated and pointed to by an entry in a block of
1641 * indirect pointers. The undo/redo states described above are also
1642 * used here. Because an indirect block contains many pointers that
1643 * may have dependencies, a second copy of the entire in-memory indirect
1644 * block is kept. The buffer cache copy is always completely up-to-date.
1645 * The second copy, which is used only as a source for disk writes,
1646 * contains only the safe pointers (i.e., those that have no remaining
1647 * update dependencies). The second copy is freed when all pointers
1648 * are safe. The cache is not allowed to replace indirect blocks with
1649 * pending update dependencies. If a buffer containing an indirect
1650 * block with dependencies is written, these routines will mark it
1651 * dirty again. It can only be successfully written once all the
1652 * dependencies are removed. The ffs_fsync routine in conjunction with
1653 * softdep_sync_metadata work together to get all the dependencies
1654 * removed so that a file can be successfully written to disk. Three
1655 * procedures are used when setting up indirect block pointer
1656 * dependencies. The division is necessary because of the organization
1657 * of the "balloc" routine and because of the distinction between file
1658 * pages and file metadata blocks.
1659 */
1660
1661/*
1662 * Allocate a new allocindir structure.
1663 */
1664/* inode for file being extended */
1665/* offset of pointer in indirect block */
1666/* disk block number being added */
1667/* previous block number, 0 if none */
1668STATIC struct allocindir *
1669newallocindir(struct inode *ip, int ptrno, daddr_t newblkno,
1670 daddr_t oldblkno)
1671{
1672 struct allocindir *aip;
1673
1674 aip = pool_get(&allocindir_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1675 aip->ai_list.wk_type = D_ALLOCINDIR6;
1676 aip->ai_stateai_list.wk_state = ATTACHED0x0001;
1677 aip->ai_offset = ptrno;
1678 aip->ai_newblkno = newblkno;
1679 aip->ai_oldblkno = oldblkno;
1680 aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fsinode_u.fs->fs_bsize);
1681 return (aip);
1682}
1683
1684/*
1685 * Called just before setting an indirect block pointer
1686 * to a newly allocated file page.
1687 */
1688/* inode for file being extended */
1689/* allocated block number within file */
1690/* buffer with indirect blk referencing page */
1691/* offset of pointer in indirect block */
1692/* disk block number being added */
1693/* previous block number, 0 if none */
1694/* buffer holding allocated page */
1695void
1696softdep_setup_allocindir_page(struct inode *ip, daddr_t lbn, struct buf *bp,
1697 int ptrno, daddr_t newblkno, daddr_t oldblkno, struct buf *nbp)
1698{
1699 struct allocindir *aip;
1700 struct pagedep *pagedep;
1701
1702 aip = newallocindir(ip, ptrno, newblkno, oldblkno);
1703 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1704 /*
1705 * If we are allocating a directory page, then we must
1706 * allocate an associated pagedep to track additions and
1707 * deletions.
1708 */
1709 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1710 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1711 WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&nbp
->b_dep)->lh_first) != ((void *)0)) (&nbp->b_dep
)->lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&nbp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&nbp->b_dep)->lh_first; } while (0)
; } while (0)
;
1712 if (nbp == NULL((void *)0)) {
1713 /*
1714 * XXXUBC - Yes, I know how to fix this, but not right now.
1715 */
1716 panic("softdep_setup_allocindir_page: Bonk art in the head");
1717 }
1718 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1719 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1720 setup_allocindir_phase2(bp, ip, aip);
1721}
1722
1723/*
1724 * Called just before setting an indirect block pointer to a
1725 * newly allocated indirect block.
1726 */
1727/* newly allocated indirect block */
1728/* inode for file being extended */
1729/* indirect block referencing allocated block */
1730/* offset of pointer in indirect block */
1731/* disk block number being added */
1732void
1733softdep_setup_allocindir_meta(struct buf *nbp, struct inode *ip,
1734 struct buf *bp, int ptrno, daddr_t newblkno)
1735{
1736 struct allocindir *aip;
1737
1738 aip = newallocindir(ip, ptrno, newblkno, 0);
1739 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1740 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1741 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1742 setup_allocindir_phase2(bp, ip, aip);
1743}
1744
1745/*
1746 * Called to finish the allocation of the "aip" allocated
1747 * by one of the two routines above.
1748 */
1749/* in-memory copy of the indirect block */
1750/* inode for file being extended */
1751/* allocindir allocated by the above routines */
1752STATIC void
1753setup_allocindir_phase2(struct buf *bp, struct inode *ip,
1754 struct allocindir *aip)
1755{
1756 struct worklist *wk;
1757 struct indirdep *indirdep, *newindirdep;
1758 struct bmsafemap *bmsafemap;
1759 struct allocindir *oldaip;
1760 struct freefrag *freefrag;
1761 struct newblk *newblk;
1762
1763 if (bp->b_lblkno >= 0)
1764 panic("setup_allocindir_phase2: not indir blk");
1765 for (indirdep = NULL((void *)0), newindirdep = NULL((void *)0); ; ) {
1766 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1767 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
1768 if (wk->wk_type != D_INDIRDEP5)
1769 continue;
1770 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
1771 break;
1772 }
1773 if (indirdep == NULL((void *)0) && newindirdep) {
1774 indirdep = newindirdep;
1775 WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list)do { (&indirdep->ir_list)->wk_state |= 0x8000; do {
if (((&indirdep->ir_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&indirdep->
ir_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&indirdep->ir_list); (&indirdep->ir_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
1776 newindirdep = NULL((void *)0);
1777 }
1778 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1779 if (indirdep) {
1780 if (newblk_lookup(ip->i_fsinode_u.fs, aip->ai_newblkno, 0,
1781 &newblk) == 0)
1782 panic("setup_allocindir: lost block");
1783 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1784 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1785 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
1786 aip->ai_buf = NULL((void *)0);
1787 } else {
1788 bmsafemap = newblk->nb_bmsafemap;
1789 aip->ai_buf = bmsafemap->sm_buf;
1790 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1791 LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
1792 aip, ai_deps)do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
;
1793 }
1794 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1795 pool_put(&newblk_pool, newblk);
1796 aip->ai_indirdep = indirdep;
1797 /*
1798 * Check to see if there is an existing dependency
1799 * for this block. If there is, merge the old
1800 * dependency into the new one.
1801 */
1802 if (aip->ai_oldblkno == 0)
1803 oldaip = NULL((void *)0);
1804 else
1805
1806 LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)for((oldaip) = ((&indirdep->ir_deplisthd)->lh_first
); (oldaip)!= ((void *)0); (oldaip) = ((oldaip)->ai_next.le_next
))
1807 if (oldaip->ai_offset == aip->ai_offset)
1808 break;
1809 freefrag = NULL((void *)0);
1810 if (oldaip != NULL((void *)0)) {
1811 if (oldaip->ai_newblkno != aip->ai_oldblkno) {
1812 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1813 panic("setup_allocindir_phase2: blkno");
1814 }
1815 aip->ai_oldblkno = oldaip->ai_oldblkno;
1816 freefrag = aip->ai_freefrag;
1817 aip->ai_freefrag = oldaip->ai_freefrag;
1818 oldaip->ai_freefrag = NULL((void *)0);
1819 free_allocindir(oldaip, NULL((void *)0));
1820 }
1821 LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_deplisthd
)->lh_first) != ((void *)0)) (&indirdep->ir_deplisthd
)->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_deplisthd)->lh_first = (aip); (aip
)->ai_next.le_prev = &(&indirdep->ir_deplisthd)
->lh_first; } while (0)
;
1822 if (ip->i_ump->um_fstype == UM_UFS11)
1823 ((int32_t *)indirdep->ir_savebp->b_data)
1824 [aip->ai_offset] = aip->ai_oldblkno;
1825 else
1826 ((int64_t *)indirdep->ir_savebp->b_data)
1827 [aip->ai_offset] = aip->ai_oldblkno;
1828 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1829 if (freefrag != NULL((void *)0))
1830 handle_workitem_freefrag(freefrag);
1831 }
1832 if (newindirdep) {
1833 if (indirdep->ir_savebp != NULL((void *)0))
1834 brelse(newindirdep->ir_savebp);
1835 WORKITEM_FREE(newindirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)newindirdep);
1836 }
1837 if (indirdep)
1838 break;
1839 newindirdep = pool_get(&indirdep_pool, PR_WAITOK0x0001);
1840 newindirdep->ir_list.wk_type = D_INDIRDEP5;
1841 newindirdep->ir_stateir_list.wk_state = ATTACHED0x0001;
1842 if (ip->i_ump->um_fstype == UM_UFS11)
1843 newindirdep->ir_stateir_list.wk_state |= UFS1FMT0x2000;
1844 LIST_INIT(&newindirdep->ir_deplisthd)do { ((&newindirdep->ir_deplisthd)->lh_first) = ((void
*)0); } while (0)
;
1845 LIST_INIT(&newindirdep->ir_donehd)do { ((&newindirdep->ir_donehd)->lh_first) = ((void
*)0); } while (0)
;
1846 if (bp->b_blkno == bp->b_lblkno) {
1847 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL((void *)0), &bp->b_blkno,
1848 NULL((void *)0));
1849 }
1850 newindirdep->ir_savebp =
1851 getblk(ip->i_devvpi_ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, INFSLP0xffffffffffffffffULL);
1852#if 0
1853 BUF_KERNPROC(newindirdep->ir_savebp);
1854#endif
1855 memcpy(newindirdep->ir_savebp->b_data, bp->b_data, bp->b_bcount)__builtin_memcpy((newindirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
1856 }
1857}
1858
1859/*
1860 * Block de-allocation dependencies.
1861 *
1862 * When blocks are de-allocated, the on-disk pointers must be nullified before
1863 * the blocks are made available for use by other files. (The true
1864 * requirement is that old pointers must be nullified before new on-disk
1865 * pointers are set. We chose this slightly more stringent requirement to
1866 * reduce complexity.) Our implementation handles this dependency by updating
1867 * the inode (or indirect block) appropriately but delaying the actual block
1868 * de-allocation (i.e., freemap and free space count manipulation) until
1869 * after the updated versions reach stable storage. After the disk is
1870 * updated, the blocks can be safely de-allocated whenever it is convenient.
1871 * This implementation handles only the common case of reducing a file's
1872 * length to zero. Other cases are handled by the conventional synchronous
1873 * write approach.
1874 *
1875 * The ffs implementation with which we worked double-checks
1876 * the state of the block pointers and file size as it reduces
1877 * a file's length. Some of this code is replicated here in our
1878 * soft updates implementation. The freeblks->fb_chkcnt field is
1879 * used to transfer a part of this information to the procedure
1880 * that eventually de-allocates the blocks.
1881 *
1882 * This routine should be called from the routine that shortens
1883 * a file's length, before the inode's size or block pointers
1884 * are modified. It will save the block pointer information for
1885 * later release and zero the inode so that the calling routine
1886 * can release it.
1887 */
1888/* The inode whose length is to be reduced */
1889/* The new length for the file */
1890void
1891softdep_setup_freeblocks(struct inode *ip, off_t length)
1892{
1893 struct freeblks *freeblks;
1894 struct inodedep *inodedep;
1895 struct allocdirect *adp;
1896 struct vnode *vp;
1897 struct buf *bp;
1898 struct fs *fs;
1899 int i, delay, error;
1900
1901 fs = ip->i_fsinode_u.fs;
1902 if (length != 0)
1903 panic("softdep_setup_freeblocks: non-zero length");
1904 freeblks = pool_get(&freeblks_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1905 freeblks->fb_list.wk_type = D_FREEBLKS8;
1906 freeblks->fb_statefb_list.wk_state = ATTACHED0x0001;
1907 freeblks->fb_uid = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
;
1908 freeblks->fb_previousinum = ip->i_number;
1909 freeblks->fb_devvp = ip->i_devvpi_ump->um_devvp;
1910 freeblks->fb_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1911 freeblks->fb_oldsize = DIP(ip, size)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_size : (ip)->dinode_u.ffs2_din->di_size)
;
1912 freeblks->fb_newsize = length;
1913 freeblks->fb_chkcnt = DIP(ip, blocks)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_blocks : (ip)->dinode_u.ffs2_din->di_blocks)
;
1914
1915 for (i = 0; i < NDADDR12; i++) {
1916 freeblks->fb_dblks[i] = DIP(ip, db[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_db[i] : (ip)->dinode_u.ffs2_din->di_db[i])
;
1917 DIP_ASSIGN(ip, db[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_db[i] = (0); else (ip)->dinode_u.ffs2_din
->di_db[i] = (0); } while (0)
;
1918 }
1919
1920 for (i = 0; i < NIADDR3; i++) {
1921 freeblks->fb_iblks[i] = DIP(ip, ib[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_ib[i] : (ip)->dinode_u.ffs2_din->di_ib[i])
;
1922 DIP_ASSIGN(ip, ib[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_ib[i] = (0); else (ip)->dinode_u.ffs2_din
->di_ib[i] = (0); } while (0)
;
1923 }
1924
1925 DIP_ASSIGN(ip, blocks, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_blocks = (0); else (ip)->dinode_u.ffs2_din
->di_blocks = (0); } while (0)
;
1926 DIP_ASSIGN(ip, size, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_size = (0); else (ip)->dinode_u.ffs2_din->
di_size = (0); } while (0)
;
1927
1928 /*
1929 * Push the zero'ed inode to to its disk buffer so that we are free
1930 * to delete its dependencies below. Once the dependencies are gone
1931 * the buffer can be safely released.
1932 */
1933 if ((error = bread(ip->i_devvpi_ump->um_devvp,
1934 fsbtodb(fs, ino_to_fsba(fs, ip->i_number))((((daddr_t)(((((daddr_t)(fs)->fs_fpg * (((ip->i_number
) / (fs)->fs_ipg))) + (fs)->fs_cgoffset * ((((ip->i_number
) / (fs)->fs_ipg)) & ~((fs)->fs_cgmask))) + (fs)->
fs_iblkno) + ((((((ip->i_number) % (fs)->fs_ipg) / ((fs
)->fs_inopb))) << ((fs))->fs_fragshift))))) <<
(fs)->fs_fsbtodb)
,
1935 (int)fs->fs_bsize, &bp)) != 0)
1936 softdep_error("softdep_setup_freeblocks", error);
1937
1938 if (ip->i_ump->um_fstype == UM_UFS11)
1939 *((struct ufs1_dinode *) bp->b_data +
1940 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din1dinode_u.ffs1_din;
1941 else
1942 *((struct ufs2_dinode *) bp->b_data +
1943 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din2dinode_u.ffs2_din;
1944
1945 /*
1946 * Find and eliminate any inode dependencies.
1947 */
1948 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1949 (void) inodedep_lookup(fs, ip->i_number, DEPALLOC0x0001, &inodedep);
1950 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) != 0) {
1951 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1952 panic("softdep_setup_freeblocks: inode busy");
1953 }
1954 /*
1955 * Add the freeblks structure to the list of operations that
1956 * must await the zero'ed inode being written to disk. If we
1957 * still have a bitmap dependency (delay == 0), then the inode
1958 * has never been written to disk, so we can process the
1959 * freeblks below once we have deleted the dependencies.
1960 */
1961 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
1962 if (delay)
1963 WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list)do { (&freeblks->fb_list)->wk_state |= 0x8000; do {
if (((&freeblks->fb_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freeblks->fb_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freeblks->fb_list); (
&freeblks->fb_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
1964 /*
1965 * Because the file length has been truncated to zero, any
1966 * pending block allocation dependency structures associated
1967 * with this inode are obsolete and can simply be de-allocated.
1968 * We must first merge the two dependency lists to get rid of
1969 * any duplicate freefrag structures, then purge the merged list.
1970 * If we still have a bitmap dependency, then the inode has never
1971 * been written to disk, so we can free any fragments without delay.
1972 */
1973 merge_inode_lists(inodedep);
1974 while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
1975 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
1976 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1977 bdwrite(bp);
1978 /*
1979 * We must wait for any I/O in progress to finish so that
1980 * all potential buffers on the dirty list will be visible.
1981 * Once they are all there, walk the list and get rid of
1982 * any dependencies.
1983 */
1984 vp = ITOV(ip)((ip)->i_vnode);
1985 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1986 drain_output(vp, 1);
1987 while ((bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first))) {
1988 if (getdirtybuf(bp, MNT_WAIT1) <= 0)
1989 break;
1990 (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
1991 deallocate_dependencies(bp, inodedep);
1992 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
1993 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1994 brelse(bp);
1995 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
1996 }
1997 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
1998 (void) free_inodedep(inodedep);
1999
2000 if (delay) {
2001 freeblks->fb_statefb_list.wk_state |= DEPCOMPLETE0x0008;
2002 /*
2003 * If the inode with zeroed block pointers is now on disk we
2004 * can start freeing blocks. Add freeblks to the worklist
2005 * instead of calling handle_workitem_freeblocks() directly as
2006 * it is more likely that additional IO is needed to complete
2007 * the request than in the !delay case.
2008 */
2009 if ((freeblks->fb_statefb_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2010 add_to_worklist(&freeblks->fb_list);
2011 }
2012
2013 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2014 /*
2015 * If the inode has never been written to disk (delay == 0),
2016 * then we can process the freeblks now that we have deleted
2017 * the dependencies.
2018 */
2019 if (!delay)
2020 handle_workitem_freeblocks(freeblks);
2021}
2022
2023/*
2024 * Reclaim any dependency structures from a buffer that is about to
2025 * be reallocated to a new vnode. The buffer must be locked, thus,
2026 * no I/O completion operations can occur while we are manipulating
2027 * its associated dependencies. The mutex is held so that other I/O's
2028 * associated with related dependencies do not occur.
2029 */
2030STATIC void
2031deallocate_dependencies(struct buf *bp, struct inodedep *inodedep)
2032{
2033 struct worklist *wk;
2034 struct indirdep *indirdep;
2035 struct allocindir *aip;
2036 struct pagedep *pagedep;
2037 struct dirrem *dirrem;
2038 struct diradd *dap;
2039 int i;
2040
2041 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2042 switch (wk->wk_type) {
2043
2044 case D_INDIRDEP5:
2045 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
2046 /*
2047 * None of the indirect pointers will ever be visible,
2048 * so they can simply be tossed. GOINGAWAY ensures
2049 * that allocated pointers will be saved in the buffer
2050 * cache until they are freed. Note that they will
2051 * only be able to be found by their physical address
2052 * since the inode mapping the logical address will
2053 * be gone. The save buffer used for the safe copy
2054 * was allocated in setup_allocindir_phase2 using
2055 * the physical address so it could be used for this
2056 * purpose. Hence we swap the safe copy with the real
2057 * copy, allowing the safe copy to be freed and holding
2058 * on to the real copy for later use in indir_trunc.
2059 */
2060 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) {
2061 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2062 panic("deallocate_dependencies: already gone");
2063 }
2064 indirdep->ir_stateir_list.wk_state |= GOINGAWAY0x0100;
2065 while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first)))
2066 free_allocindir(aip, inodedep);
2067 if (bp->b_lblkno >= 0 ||
2068 bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
2069 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2070 panic("deallocate_dependencies: not indir");
2071 }
2072 memcpy(indirdep->ir_savebp->b_data, bp->b_data,__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
2073 bp->b_bcount)__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
2074 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2075 WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&indirdep->ir_savebp->b_dep)->lh_first
) != ((void *)0)) (&indirdep->ir_savebp->b_dep)->
lh_first->wk_list.le_prev = &(wk)->wk_list.le_next;
(&indirdep->ir_savebp->b_dep)->lh_first = (wk);
(wk)->wk_list.le_prev = &(&indirdep->ir_savebp
->b_dep)->lh_first; } while (0); } while (0)
;
2076 continue;
2077
2078 case D_PAGEDEP0:
2079 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
2080 /*
2081 * None of the directory additions will ever be
2082 * visible, so they can simply be tossed.
2083 */
2084 for (i = 0; i < DAHASHSZ6; i++)
2085 while ((dap =
2086 LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first)))
2087 free_diradd(dap);
2088 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)))
2089 free_diradd(dap);
2090 /*
2091 * Copy any directory remove dependencies to the list
2092 * to be processed after the zero'ed inode is written.
2093 * If the inode has already been written, then they
2094 * can be dumped directly onto the work list.
2095 */
2096 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first))) {
2097 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
2098 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2099 if (inodedep == NULL((void *)0) ||
2100 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) ==
2101 ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2102 add_to_worklist(&dirrem->dm_list);
2103 else
2104 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2105 &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2106 }
2107 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2108 LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)for((wk) = ((&inodedep->id_bufwait)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
2109 if (wk->wk_type == D_NEWDIRBLK13 &&
2110 WK_NEWDIRBLK(wk)((struct newdirblk *)(wk))->db_pagedep ==
2111 pagedep)
2112 break;
2113 if (wk != NULL((void *)0)) {
2114 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2115 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
2116 } else {
2117 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2118 panic("deallocate_dependencies: "
2119 "lost pagedep");
2120 }
2121 }
2122 WORKLIST_REMOVE(&pagedep->pd_list)do { (&pagedep->pd_list)->wk_state &= ~0x8000; do
{ if ((&pagedep->pd_list)->wk_list.le_next != ((void
*)0)) (&pagedep->pd_list)->wk_list.le_next->wk_list
.le_prev = (&pagedep->pd_list)->wk_list.le_prev; *(
&pagedep->pd_list)->wk_list.le_prev = (&pagedep
->pd_list)->wk_list.le_next; ((&pagedep->pd_list
)->wk_list.le_prev) = ((void *)-1); ((&pagedep->pd_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2123 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2124 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2125 continue;
2126
2127 case D_ALLOCINDIR6:
2128 free_allocindir(WK_ALLOCINDIR(wk)((struct allocindir *)(wk)), inodedep);
2129 continue;
2130
2131 case D_ALLOCDIRECT4:
2132 case D_INODEDEP1:
2133 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2134 panic("deallocate_dependencies: Unexpected type %s",
2135 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2136 /* NOTREACHED */
2137
2138 default:
2139 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2140 panic("deallocate_dependencies: Unknown type %s",
2141 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2142 /* NOTREACHED */
2143 }
2144 }
2145}
2146
2147/*
2148 * Free an allocdirect. Generate a new freefrag work request if appropriate.
2149 * This routine must be called with splbio interrupts blocked.
2150 */
2151STATIC void
2152free_allocdirect(struct allocdirectlst *adphead, struct allocdirect *adp,
2153 int delay)
2154{
2155 struct newdirblk *newdirblk;
2156 struct worklist *wk;
2157
2158 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
2159
2160#ifdef DEBUG
2161 if (lk.lkt_held == -1)
2162 panic("free_allocdirect: lock not held");
2163#endif
2164 if ((adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008) == 0)
2165 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
2166 TAILQ_REMOVE(adphead, adp, ad_next)do { if (((adp)->ad_next.tqe_next) != ((void *)0)) (adp)->
ad_next.tqe_next->ad_next.tqe_prev = (adp)->ad_next.tqe_prev
; else (adphead)->tqh_last = (adp)->ad_next.tqe_prev; *
(adp)->ad_next.tqe_prev = (adp)->ad_next.tqe_next; ((adp
)->ad_next.tqe_prev) = ((void *)-1); ((adp)->ad_next.tqe_next
) = ((void *)-1); } while (0)
;
2167 if ((adp->ad_statead_list.wk_state & COMPLETE0x0004) == 0)
2168 WORKLIST_REMOVE(&adp->ad_list)do { (&adp->ad_list)->wk_state &= ~0x8000; do {
if ((&adp->ad_list)->wk_list.le_next != ((void *)0
)) (&adp->ad_list)->wk_list.le_next->wk_list.le_prev
= (&adp->ad_list)->wk_list.le_prev; *(&adp->
ad_list)->wk_list.le_prev = (&adp->ad_list)->wk_list
.le_next; ((&adp->ad_list)->wk_list.le_prev) = ((void
*)-1); ((&adp->ad_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2169 if (adp->ad_freefrag != NULL((void *)0)) {
2170 if (delay)
2171 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
2172 &adp->ad_freefrag->ff_list)do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
;
2173 else
2174 add_to_worklist(&adp->ad_freefrag->ff_list);
2175 }
2176 if ((wk = LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
2177 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
2178 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2179 if (LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first) != NULL((void *)0))
2180 panic("free_allocdirect: extra newdirblk");
2181 if (delay)
2182 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2183 &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2184 else
2185 free_newdirblk(newdirblk);
2186 }
2187 WORKITEM_FREE(adp, D_ALLOCDIRECT)softdep_freequeue_add((struct worklist *)adp);
2188}
2189
2190/*
2191 * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
2192 * This routine must be called with splbio interrupts blocked.
2193 */
2194void
2195free_newdirblk(struct newdirblk *newdirblk)
2196{
2197 struct pagedep *pagedep;
2198 struct diradd *dap;
2199 int i;
2200
2201 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
2202
2203#ifdef DEBUG
2204 if (lk.lkt_held == -1)
2205 panic("free_newdirblk: lock not held");
2206#endif
2207 /*
2208 * If the pagedep is still linked onto the directory buffer
2209 * dependency chain, then some of the entries on the
2210 * pd_pendinghd list may not be committed to disk yet. In
2211 * this case, we will simply clear the NEWBLOCK flag and
2212 * let the pd_pendinghd list be processed when the pagedep
2213 * is next written. If the pagedep is no longer on the buffer
2214 * dependency chain, then all the entries on the pd_pending
2215 * list are committed to disk and we can free them here.
2216 */
2217 pagedep = newdirblk->db_pagedep;
2218 pagedep->pd_statepd_list.wk_state &= ~NEWBLOCK0x0800;
2219 if ((pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
2220 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
2221 free_diradd(dap);
2222 /*
2223 * If no dependencies remain, the pagedep will be freed.
2224 */
2225 for (i = 0; i < DAHASHSZ6; i++)
2226 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) != NULL((void *)0))
2227 break;
2228 if (i == DAHASHSZ6 && (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0) {
2229 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2230 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2231 }
2232 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2233}
2234
2235/*
2236 * Prepare an inode to be freed. The actual free operation is not
2237 * done until the zero'ed inode has been written to disk.
2238 */
2239void
2240softdep_freefile(struct vnode *pvp, ufsino_t ino, mode_t mode)
2241{
2242 struct inode *ip = VTOI(pvp)((struct inode *)(pvp)->v_data);
2243 struct inodedep *inodedep;
2244 struct freefile *freefile;
2245
2246 /*
2247 * This sets up the inode de-allocation dependency.
2248 */
2249 freefile = pool_get(&freefile_pool, PR_WAITOK0x0001);
2250 freefile->fx_list.wk_type = D_FREEFILE9;
2251 freefile->fx_list.wk_state = 0;
2252 freefile->fx_mode = mode;
2253 freefile->fx_oldinum = ino;
2254 freefile->fx_devvp = ip->i_devvpi_ump->um_devvp;
2255 freefile->fx_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2256
2257 /*
2258 * If the inodedep does not exist, then the zero'ed inode has
2259 * been written to disk. If the allocated inode has never been
2260 * written to disk, then the on-disk inode is zero'ed. In either
2261 * case we can free the file immediately.
2262 */
2263 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2264 if (inodedep_lookup(ip->i_fsinode_u.fs, ino, 0, &inodedep) == 0 ||
2265 check_inode_unwritten(inodedep)) {
2266 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2267 handle_workitem_freefile(freefile);
2268 return;
2269 }
2270 WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list)do { (&freefile->fx_list)->wk_state |= 0x8000; do {
if (((&freefile->fx_list)->wk_list.le_next = (&
inodedep->id_inowait)->lh_first) != ((void *)0)) (&
inodedep->id_inowait)->lh_first->wk_list.le_prev = &
(&freefile->fx_list)->wk_list.le_next; (&inodedep
->id_inowait)->lh_first = (&freefile->fx_list); (
&freefile->fx_list)->wk_list.le_prev = &(&inodedep
->id_inowait)->lh_first; } while (0); } while (0)
;
2271 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2272}
2273
2274/*
2275 * Check to see if an inode has never been written to disk. If
2276 * so free the inodedep and return success, otherwise return failure.
2277 * This routine must be called with splbio interrupts blocked.
2278 *
2279 * If we still have a bitmap dependency, then the inode has never
2280 * been written to disk. Drop the dependency as it is no longer
2281 * necessary since the inode is being deallocated. We set the
2282 * ALLCOMPLETE flags since the bitmap now properly shows that the
2283 * inode is not allocated. Even if the inode is actively being
2284 * written, it has been rolled back to its zero'ed state, so we
2285 * are ensured that a zero inode is what is on the disk. For short
2286 * lived files, this change will usually result in removing all the
2287 * dependencies from the inode so that it can be freed immediately.
2288 */
2289STATIC int
2290check_inode_unwritten(struct inodedep *inodedep)
2291{
2292 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
2293
2294 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 ||
2295 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2296 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2297 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2298 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2299 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2300 inodedep->id_nlinkdelta != 0)
2301 return (0);
2302 inodedep->id_stateid_list.wk_state |= ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
2303 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
2304 inodedep->id_buf = NULL((void *)0);
2305 if (inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000)
2306 WORKLIST_REMOVE(&inodedep->id_list)do { (&inodedep->id_list)->wk_state &= ~0x8000;
do { if ((&inodedep->id_list)->wk_list.le_next != (
(void *)0)) (&inodedep->id_list)->wk_list.le_next->
wk_list.le_prev = (&inodedep->id_list)->wk_list.le_prev
; *(&inodedep->id_list)->wk_list.le_prev = (&inodedep
->id_list)->wk_list.le_next; ((&inodedep->id_list
)->wk_list.le_prev) = ((void *)-1); ((&inodedep->id_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2307 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
2308 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
2309 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
2310 }
2311 if (free_inodedep(inodedep) == 0) {
2312 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2313 panic("check_inode_unwritten: busy inode");
2314 }
2315 return (1);
2316}
2317
2318/*
2319 * Try to free an inodedep structure. Return 1 if it could be freed.
2320 */
2321STATIC int
2322free_inodedep(struct inodedep *inodedep)
2323{
2324
2325 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) != 0 ||
2326 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008) ||
2327 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2328 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2329 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2330 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2331 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2332 inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0))
2333 return (0);
2334 LIST_REMOVE(inodedep, id_hash)do { if ((inodedep)->id_hash.le_next != ((void *)0)) (inodedep
)->id_hash.le_next->id_hash.le_prev = (inodedep)->id_hash
.le_prev; *(inodedep)->id_hash.le_prev = (inodedep)->id_hash
.le_next; ((inodedep)->id_hash.le_prev) = ((void *)-1); ((
inodedep)->id_hash.le_next) = ((void *)-1); } while (0)
;
2335 WORKITEM_FREE(inodedep, D_INODEDEP)softdep_freequeue_add((struct worklist *)inodedep);
2336 num_inodedep -= 1;
2337 return (1);
2338}
2339
2340/*
2341 * This workitem routine performs the block de-allocation.
2342 * The workitem is added to the pending list after the updated
2343 * inode block has been written to disk. As mentioned above,
2344 * checks regarding the number of blocks de-allocated (compared
2345 * to the number of blocks allocated for the file) are also
2346 * performed in this function.
2347 */
2348STATIC void
2349handle_workitem_freeblocks(struct freeblks *freeblks)
2350{
2351 struct inode tip;
2352 daddr_t bn;
2353 union {
2354 struct ufs1_dinode di1;
2355 struct ufs2_dinode di2;
2356 } di;
2357 struct fs *fs;
2358 int i, level, bsize;
2359 long nblocks, blocksreleased = 0;
2360 int error, allerror = 0;
2361 daddr_t baselbns[NIADDR3], tmpval;
2362
2363 if (VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fstype == UM_UFS11)
2364 tip.i_din1dinode_u.ffs1_din = &di.di1;
2365 else
2366 tip.i_din2dinode_u.ffs2_din = &di.di2;
2367
2368 tip.i_fsinode_u.fs = fs = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fsufsmount_u.fs;
2369 tip.i_number = freeblks->fb_previousinum;
2370 tip.i_ump = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data));
2371 tip.i_dev = freeblks->fb_devvp->v_rdevv_un.vu_specinfo->si_rdev;
2372 DIP_ASSIGN(&tip, size, freeblks->fb_oldsize)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_size = (freeblks->fb_oldsize); else
(&tip)->dinode_u.ffs2_din->di_size = (freeblks->
fb_oldsize); } while (0)
;
2373 DIP_ASSIGN(&tip, uid, freeblks->fb_uid)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_uid = (freeblks->fb_uid); else (&
tip)->dinode_u.ffs2_din->di_uid = (freeblks->fb_uid)
; } while (0)
;
2374 tip.i_vnode = NULL((void *)0);
2375 tmpval = 1;
2376 baselbns[0] = NDADDR12;
2377 for (i = 1; i < NIADDR3; i++) {
2378 tmpval *= NINDIR(fs)((fs)->fs_nindir);
2379 baselbns[i] = baselbns[i - 1] + tmpval;
2380 }
2381 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2382 blocksreleased = 0;
2383 /*
2384 * Indirect blocks first.
2385 */
2386 for (level = (NIADDR3 - 1); level >= 0; level--) {
2387 if ((bn = freeblks->fb_iblks[level]) == 0)
2388 continue;
2389 if ((error = indir_trunc(&tip, fsbtodb(fs, bn)((bn) << (fs)->fs_fsbtodb), level,
2390 baselbns[level], &blocksreleased)) != 0)
2391 allerror = error;
2392 ffs_blkfree(&tip, bn, fs->fs_bsize);
2393 blocksreleased += nblocks;
2394 }
2395 /*
2396 * All direct blocks or frags.
2397 */
2398 for (i = (NDADDR12 - 1); i >= 0; i--) {
2399 if ((bn = freeblks->fb_dblks[i]) == 0)
2400 continue;
2401 bsize = blksize(fs, &tip, i)(((i) >= 12 || ((((&tip))->i_ump->um_fstype == 1
) ? ((&tip))->dinode_u.ffs1_din->di_size : ((&tip
))->dinode_u.ffs2_din->di_size) >= ((i) + 1) <<
(fs)->fs_bshift) ? (u_int64_t)(fs)->fs_bsize : (((((((
(((&tip))->i_ump->um_fstype == 1) ? ((&tip))->
dinode_u.ffs1_din->di_size : ((&tip))->dinode_u.ffs2_din
->di_size)) & (fs)->fs_qbmask)) + (fs)->fs_qfmask
) & (fs)->fs_fmask)))
;
2402 ffs_blkfree(&tip, bn, bsize);
2403 blocksreleased += btodb(bsize)((bsize) >> 9);
2404 }
2405
2406#ifdef DIAGNOSTIC1
2407 if (freeblks->fb_chkcnt != blocksreleased)
2408 printf("handle_workitem_freeblocks: block count\n");
2409 if (allerror)
2410 softdep_error("handle_workitem_freeblks", allerror);
2411#endif /* DIAGNOSTIC */
2412 WORKITEM_FREE(freeblks, D_FREEBLKS)softdep_freequeue_add((struct worklist *)freeblks);
2413}
2414
2415/*
2416 * Release blocks associated with the inode ip and stored in the indirect
2417 * block dbn. If level is greater than SINGLE, the block is an indirect block
2418 * and recursive calls to indirtrunc must be used to cleanse other indirect
2419 * blocks.
2420 */
2421STATIC int
2422indir_trunc(struct inode *ip, daddr_t dbn, int level, daddr_t lbn,
2423 long *countp)
2424{
2425 struct buf *bp;
2426 int32_t *bap1 = NULL((void *)0);
2427 int64_t nb, *bap2 = NULL((void *)0);
2428 struct fs *fs;
2429 struct worklist *wk;
2430 struct indirdep *indirdep;
2431 int i, lbnadd, nblocks, ufs1fmt;
2432 int error, allerror = 0;
2433
2434 fs = ip->i_fsinode_u.fs;
2435 lbnadd = 1;
2436 for (i = level; i > 0; i--)
2437 lbnadd *= NINDIR(fs)((fs)->fs_nindir);
2438 /*
2439 * Get buffer of block pointers to be freed. This routine is not
2440 * called until the zero'ed inode has been written, so it is safe
2441 * to free blocks as they are encountered. Because the inode has
2442 * been zero'ed, calls to bmap on these blocks will fail. So, we
2443 * have to use the on-disk address and the block device for the
2444 * filesystem to look them up. If the file was deleted before its
2445 * indirect blocks were all written to disk, the routine that set
2446 * us up (deallocate_dependencies) will have arranged to leave
2447 * a complete copy of the indirect block in memory for our use.
2448 * Otherwise we have to read the blocks in from the disk.
2449 */
2450 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2451 if ((bp = incore(ip->i_devvpi_ump->um_devvp, dbn)) != NULL((void *)0) &&
2452 (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2453 if (wk->wk_type != D_INDIRDEP5 ||
2454 (indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk)))->ir_savebp != bp ||
2455 (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) == 0) {
2456 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2457 panic("indir_trunc: lost indirdep");
2458 }
2459 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2460 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
2461 if (LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first) != NULL((void *)0)) {
2462 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2463 panic("indir_trunc: dangling dep");
2464 }
2465 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2466 } else {
2467 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2468 error = bread(ip->i_devvpi_ump->um_devvp, dbn, (int)fs->fs_bsize, &bp);
2469 if (error)
2470 return (error);
2471 }
2472 /*
2473 * Recursively free indirect blocks.
2474 */
2475 if (ip->i_ump->um_fstype == UM_UFS11) {
2476 ufs1fmt = 1;
2477 bap1 = (int32_t *)bp->b_data;
2478 } else {
2479 ufs1fmt = 0;
2480 bap2 = (int64_t *)bp->b_data;
2481 }
2482 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2483 for (i = NINDIR(fs)((fs)->fs_nindir) - 1; i >= 0; i--) {
2484 if (ufs1fmt)
2485 nb = bap1[i];
2486 else
2487 nb = bap2[i];
2488 if (nb == 0)
2489 continue;
2490 if (level != 0) {
2491 if ((error = indir_trunc(ip, fsbtodb(fs, nb)((nb) << (fs)->fs_fsbtodb),
2492 level - 1, lbn + (i * lbnadd), countp)) != 0)
2493 allerror = error;
2494 }
2495 ffs_blkfree(ip, nb, fs->fs_bsize);
2496 *countp += nblocks;
2497 }
2498 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
2499 brelse(bp);
2500 return (allerror);
2501}
2502
2503/*
2504 * Free an allocindir.
2505 * This routine must be called with splbio interrupts blocked.
2506 */
2507STATIC void
2508free_allocindir(struct allocindir *aip, struct inodedep *inodedep)
2509{
2510 struct freefrag *freefrag;
2511
2512 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
2513
2514#ifdef DEBUG
2515 if (lk.lkt_held == -1)
2516 panic("free_allocindir: lock not held");
2517#endif
2518 if ((aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008) == 0)
2519 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
2520 if (aip->ai_stateai_list.wk_state & ONWORKLIST0x8000)
2521 WORKLIST_REMOVE(&aip->ai_list)do { (&aip->ai_list)->wk_state &= ~0x8000; do {
if ((&aip->ai_list)->wk_list.le_next != ((void *)0
)) (&aip->ai_list)->wk_list.le_next->wk_list.le_prev
= (&aip->ai_list)->wk_list.le_prev; *(&aip->
ai_list)->wk_list.le_prev = (&aip->ai_list)->wk_list
.le_next; ((&aip->ai_list)->wk_list.le_prev) = ((void
*)-1); ((&aip->ai_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2522 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
2523 if ((freefrag = aip->ai_freefrag) != NULL((void *)0)) {
2524 if (inodedep == NULL((void *)0))
2525 add_to_worklist(&freefrag->ff_list);
2526 else
2527 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
2528 &freefrag->ff_list)do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
2529 }
2530 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
2531}
2532
2533/*
2534 * Directory entry addition dependencies.
2535 *
2536 * When adding a new directory entry, the inode (with its incremented link
2537 * count) must be written to disk before the directory entry's pointer to it.
2538 * Also, if the inode is newly allocated, the corresponding freemap must be
2539 * updated (on disk) before the directory entry's pointer. These requirements
2540 * are met via undo/redo on the directory entry's pointer, which consists
2541 * simply of the inode number.
2542 *
2543 * As directory entries are added and deleted, the free space within a
2544 * directory block can become fragmented. The ufs file system will compact
2545 * a fragmented directory block to make space for a new entry. When this
2546 * occurs, the offsets of previously added entries change. Any "diradd"
2547 * dependency structures corresponding to these entries must be updated with
2548 * the new offsets.
2549 */
2550
2551/*
2552 * This routine is called after the in-memory inode's link
2553 * count has been incremented, but before the directory entry's
2554 * pointer to the inode has been set.
2555 */
2556/* buffer containing directory block */
2557/* inode for directory */
2558/* offset of new entry in directory */
2559/* inode referenced by new directory entry */
2560/* non-NULL => contents of new mkdir */
2561/* entry is in a newly allocated block */
2562int
2563softdep_setup_directory_add(struct buf *bp, struct inode *dp, off_t diroffset,
2564 long newinum, struct buf *newdirbp, int isnewblk)
2565{
2566 int offset; /* offset of new entry within directory block */
2567 daddr_t lbn; /* block in directory containing new entry */
2568 struct fs *fs;
2569 struct diradd *dap;
2570 struct allocdirect *adp;
2571 struct pagedep *pagedep;
2572 struct inodedep *inodedep;
2573 struct newdirblk *newdirblk = NULL((void *)0);
2574 struct mkdir *mkdir1, *mkdir2;
2575
2576
2577 fs = dp->i_fsinode_u.fs;
2578 lbn = lblkno(fs, diroffset)((diroffset) >> (fs)->fs_bshift);
2579 offset = blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask);
2580 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2581 dap->da_list.wk_type = D_DIRADD10;
2582 dap->da_offset = offset;
2583 dap->da_newinum = newinum;
2584 dap->da_stateda_list.wk_state = ATTACHED0x0001;
2585 if (isnewblk && lbn < NDADDR12 && fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) == 0) {
2586 newdirblk = pool_get(&newdirblk_pool, PR_WAITOK0x0001);
2587 newdirblk->db_list.wk_type = D_NEWDIRBLK13;
2588 newdirblk->db_statedb_list.wk_state = 0;
2589 }
2590 if (newdirbp == NULL((void *)0)) {
2591 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
2592 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2593 } else {
2594 dap->da_stateda_list.wk_state |= MKDIR_BODY0x0020 | MKDIR_PARENT0x0010;
2595 mkdir1 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2596 mkdir1->md_list.wk_type = D_MKDIR11;
2597 mkdir1->md_statemd_list.wk_state = MKDIR_BODY0x0020;
2598 mkdir1->md_diradd = dap;
2599 mkdir2 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2600 mkdir2->md_list.wk_type = D_MKDIR11;
2601 mkdir2->md_statemd_list.wk_state = MKDIR_PARENT0x0010;
2602 mkdir2->md_diradd = dap;
2603 /*
2604 * Dependency on "." and ".." being written to disk.
2605 */
2606 mkdir1->md_buf = newdirbp;
2607 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2608 LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs)do { if (((mkdir1)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir1)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir1); (mkdir1)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2609 WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list)do { (&mkdir1->md_list)->wk_state |= 0x8000; do { if
(((&mkdir1->md_list)->wk_list.le_next = (&newdirbp
->b_dep)->lh_first) != ((void *)0)) (&newdirbp->
b_dep)->lh_first->wk_list.le_prev = &(&mkdir1->
md_list)->wk_list.le_next; (&newdirbp->b_dep)->lh_first
= (&mkdir1->md_list); (&mkdir1->md_list)->wk_list
.le_prev = &(&newdirbp->b_dep)->lh_first; } while
(0); } while (0)
;
2610 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2611 bdwrite(newdirbp);
2612 /*
2613 * Dependency on link count increase for parent directory
2614 */
2615 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2616 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
2617 || (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
2618 dap->da_stateda_list.wk_state &= ~MKDIR_PARENT0x0010;
2619 WORKITEM_FREE(mkdir2, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir2);
2620 } else {
2621 LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs)do { if (((mkdir2)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir2)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir2); (mkdir2)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2622 WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list)do { (&mkdir2->md_list)->wk_state |= 0x8000; do { if
(((&mkdir2->md_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
mkdir2->md_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&mkdir2->md_list); (&mkdir2->
md_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2623 }
2624 }
2625 /*
2626 * Link into parent directory pagedep to await its being written.
2627 */
2628 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2629 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2630 dap->da_pagedepda_un.dau_pagedep = pagedep;
2631 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
2632 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
2633 /*
2634 * Link into its inodedep. Put it on the id_bufwait list if the inode
2635 * is not yet written. If it is written, do the post-inode write
2636 * processing to put it on the id_pendinghd list.
2637 */
2638 (void) inodedep_lookup(fs, newinum, DEPALLOC0x0001, &inodedep);
2639 if ((inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2640 diradd_inode_written(dap, inodedep);
2641 else
2642 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
2643 if (isnewblk) {
2644 /*
2645 * Directories growing into indirect blocks are rare
2646 * enough and the frequency of new block allocation
2647 * in those cases even more rare, that we choose not
2648 * to bother tracking them. Rather we simply force the
2649 * new directory entry to disk.
2650 */
2651 if (lbn >= NDADDR12) {
2652 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2653 /*
2654 * We only have a new allocation when at the
2655 * beginning of a new block, not when we are
2656 * expanding into an existing block.
2657 */
2658 if (blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask) == 0)
2659 return (1);
2660 return (0);
2661 }
2662 /*
2663 * We only have a new allocation when at the beginning
2664 * of a new fragment, not when we are expanding into an
2665 * existing fragment. Also, there is nothing to do if we
2666 * are already tracking this block.
2667 */
2668 if (fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) != 0) {
2669 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2670 return (0);
2671 }
2672
2673 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2674 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2675 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2676 return (0);
2677 }
2678 /*
2679 * Find our associated allocdirect and have it track us.
2680 */
2681 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
2682 panic("softdep_setup_directory_add: lost inodedep");
2683 adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst)(*(((struct allocdirectlst *)((&inodedep->id_newinoupdt
)->tqh_last))->tqh_last))
;
2684 if (adp == NULL((void *)0) || adp->ad_lbn != lbn) {
2685 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2686 panic("softdep_setup_directory_add: lost entry");
2687 }
2688 pagedep->pd_statepd_list.wk_state |= NEWBLOCK0x0800;
2689 newdirblk->db_pagedep = pagedep;
2690 WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_newdirblk)->lh_first) != ((void *)0)) (&adp
->ad_newdirblk)->lh_first->wk_list.le_prev = &(&
newdirblk->db_list)->wk_list.le_next; (&adp->ad_newdirblk
)->lh_first = (&newdirblk->db_list); (&newdirblk
->db_list)->wk_list.le_prev = &(&adp->ad_newdirblk
)->lh_first; } while (0); } while (0)
;
2691 }
2692 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2693 return (0);
2694}
2695
2696/*
2697 * This procedure is called to change the offset of a directory
2698 * entry when compacting a directory block which must be owned
2699 * exclusively by the caller. Note that the actual entry movement
2700 * must be done in this procedure to ensure that no I/O completions
2701 * occur while the move is in progress.
2702 */
2703/* inode for directory */
2704/* address of dp->i_offset */
2705/* address of old directory location */
2706/* address of new directory location */
2707/* size of directory entry */
2708void
2709softdep_change_directoryentry_offset(struct inode *dp, caddr_t base,
2710 caddr_t oldloc, caddr_t newloc, int entrysize)
2711{
2712 int offset, oldoffset, newoffset;
2713 struct pagedep *pagedep;
2714 struct diradd *dap;
2715 daddr_t lbn;
2716
2717 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2718 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2719 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2720 if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
2721 goto done;
2722 oldoffset = offset + (oldloc - base);
2723 newoffset = offset + (newloc - base);
2724
2725 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((oldoffset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
{
2726 if (dap->da_offset != oldoffset)
2727 continue;
2728 dap->da_offset = newoffset;
2729 if (DIRADDHASH(newoffset)(((newoffset) >> 2) % 6) == DIRADDHASH(oldoffset)(((oldoffset) >> 2) % 6))
2730 break;
2731 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2732 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
2733 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
;
2734 break;
2735 }
2736 if (dap == NULL((void *)0)) {
2737
2738 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
2739 if (dap->da_offset == oldoffset) {
2740 dap->da_offset = newoffset;
2741 break;
2742 }
2743 }
2744 }
2745done:
2746 memmove(newloc, oldloc, entrysize)__builtin_memmove((newloc), (oldloc), (entrysize));
2747 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2748}
2749
2750/*
2751 * Free a diradd dependency structure. This routine must be called
2752 * with splbio interrupts blocked.
2753 */
2754STATIC void
2755free_diradd(struct diradd *dap)
2756{
2757 struct dirrem *dirrem;
2758 struct pagedep *pagedep;
2759 struct inodedep *inodedep;
2760 struct mkdir *mkdir, *nextmd;
2761
2762 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
2763
2764#ifdef DEBUG
2765 if (lk.lkt_held == -1)
2766 panic("free_diradd: lock not held");
2767#endif
2768 WORKLIST_REMOVE(&dap->da_list)do { (&dap->da_list)->wk_state &= ~0x8000; do {
if ((&dap->da_list)->wk_list.le_next != ((void *)0
)) (&dap->da_list)->wk_list.le_next->wk_list.le_prev
= (&dap->da_list)->wk_list.le_prev; *(&dap->
da_list)->wk_list.le_prev = (&dap->da_list)->wk_list
.le_next; ((&dap->da_list)->wk_list.le_prev) = ((void
*)-1); ((&dap->da_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2769 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2770 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) == 0) {
2771 pagedep = dap->da_pagedepda_un.dau_pagedep;
2772 } else {
2773 dirrem = dap->da_previousda_un.dau_previous;
2774 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2775 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2776 add_to_worklist(&dirrem->dm_list);
2777 }
2778 if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)((struct ufsmount *)((pagedep->pd_mnt)->mnt_data))->um_fsufsmount_u.fs, dap->da_newinum,
2779 0, &inodedep) != 0)
2780 (void) free_inodedep(inodedep);
2781 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2782 for (mkdir = LIST_FIRST(&mkdirlisthd)((&mkdirlisthd)->lh_first); mkdir; mkdir = nextmd) {
2783 nextmd = LIST_NEXT(mkdir, md_mkdirs)((mkdir)->md_mkdirs.le_next);
2784 if (mkdir->md_diradd != dap)
2785 continue;
2786 dap->da_stateda_list.wk_state &= ~mkdir->md_statemd_list.wk_state;
2787 WORKLIST_REMOVE(&mkdir->md_list)do { (&mkdir->md_list)->wk_state &= ~0x8000; do
{ if ((&mkdir->md_list)->wk_list.le_next != ((void
*)0)) (&mkdir->md_list)->wk_list.le_next->wk_list
.le_prev = (&mkdir->md_list)->wk_list.le_prev; *(&
mkdir->md_list)->wk_list.le_prev = (&mkdir->md_list
)->wk_list.le_next; ((&mkdir->md_list)->wk_list.
le_prev) = ((void *)-1); ((&mkdir->md_list)->wk_list
.le_next) = ((void *)-1); } while (0); } while (0)
;
2788 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
2789 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
2790 }
2791 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2792 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2793 panic("free_diradd: unfound ref");
2794 }
2795 }
2796 WORKITEM_FREE(dap, D_DIRADD)softdep_freequeue_add((struct worklist *)dap);
2797}
2798
2799/*
2800 * Directory entry removal dependencies.
2801 *
2802 * When removing a directory entry, the entry's inode pointer must be
2803 * zero'ed on disk before the corresponding inode's link count is decremented
2804 * (possibly freeing the inode for re-use). This dependency is handled by
2805 * updating the directory entry but delaying the inode count reduction until
2806 * after the directory block has been written to disk. After this point, the
2807 * inode count can be decremented whenever it is convenient.
2808 */
2809
2810/*
2811 * This routine should be called immediately after removing
2812 * a directory entry. The inode's link count should not be
2813 * decremented by the calling procedure -- the soft updates
2814 * code will do this task when it is safe.
2815 */
2816/* buffer containing directory block */
2817/* inode for the directory being modified */
2818/* inode for directory entry being removed */
2819/* indicates if doing RMDIR */
2820void
2821softdep_setup_remove(struct buf *bp, struct inode *dp, struct inode *ip,
2822 int isrmdir)
2823{
2824 struct dirrem *dirrem, *prevdirrem;
2825
2826 /*
2827 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
2828 */
2829 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2830
2831 /*
2832 * If the COMPLETE flag is clear, then there were no active
2833 * entries and we want to roll back to a zeroed entry until
2834 * the new inode is committed to disk. If the COMPLETE flag is
2835 * set then we have deleted an entry that never made it to
2836 * disk. If the entry we deleted resulted from a name change,
2837 * then the old name still resides on disk. We cannot delete
2838 * its inode (returned to us in prevdirrem) until the zeroed
2839 * directory entry gets to disk. The new inode has never been
2840 * referenced on the disk, so can be deleted immediately.
2841 */
2842 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
2843 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
2844 dm_next)do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
;
2845 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2846 } else {
2847 if (prevdirrem != NULL((void *)0))
2848 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
2849 prevdirrem, dm_next)do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
;
2850 dirrem->dm_dirinumdm_un.dmu_dirinum = dirrem->dm_pagedepdm_un.dmu_pagedep->pd_ino;
2851 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2852 handle_workitem_remove(dirrem);
2853 }
2854}
2855
2856STATIC long num_dirrem; /* number of dirrem allocated */
2857/*
2858 * Allocate a new dirrem if appropriate and return it along with
2859 * its associated pagedep. Called without a lock, returns with lock.
2860 */
2861/* buffer containing directory block */
2862/* inode for the directory being modified */
2863/* inode for directory entry being removed */
2864/* indicates if doing RMDIR */
2865/* previously referenced inode, if any */
2866STATIC struct dirrem *
2867newdirrem(struct buf *bp, struct inode *dp, struct inode *ip, int isrmdir,
2868 struct dirrem **prevdirremp)
2869{
2870 int offset;
2871 daddr_t lbn;
2872 struct diradd *dap;
2873 struct dirrem *dirrem;
2874 struct pagedep *pagedep;
2875
2876 /*
2877 * Whiteouts have no deletion dependencies.
2878 */
2879 if (ip == NULL((void *)0))
2880 panic("newdirrem: whiteout");
2881 /*
2882 * If we are over our limit, try to improve the situation.
2883 * Limiting the number of dirrem structures will also limit
2884 * the number of freefile and freeblks structures.
2885 */
2886 if (num_dirrem > max_softdeps / 2)
2887 (void) request_cleanup(FLUSH_REMOVE2, 0);
2888 num_dirrem += 1;
2889 dirrem = pool_get(&dirrem_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2890 dirrem->dm_list.wk_type = D_DIRREM12;
2891 dirrem->dm_statedm_list.wk_state = isrmdir ? RMDIR0x0040 : 0;
2892 dirrem->dm_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2893 dirrem->dm_oldinum = ip->i_number;
2894 *prevdirremp = NULL((void *)0);
2895
2896 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
2897 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2898 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2899 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2900 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2901 dirrem->dm_pagedepdm_un.dmu_pagedep = pagedep;
2902 /*
2903 * Check for a diradd dependency for the same directory entry.
2904 * If present, then both dependencies become obsolete and can
2905 * be de-allocated. Check for an entry on both the pd_dirraddhd
2906 * list and the pd_pendinghd list.
2907 */
2908
2909 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((offset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
2910 if (dap->da_offset == offset)
2911 break;
2912 if (dap == NULL((void *)0)) {
2913
2914 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
2915 if (dap->da_offset == offset)
2916 break;
2917 if (dap == NULL((void *)0))
2918 return (dirrem);
2919 }
2920 /*
2921 * Must be ATTACHED at this point.
2922 */
2923 if ((dap->da_stateda_list.wk_state & ATTACHED0x0001) == 0) {
2924 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2925 panic("newdirrem: not ATTACHED");
2926 }
2927 if (dap->da_newinum != ip->i_number) {
2928 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2929 panic("newdirrem: inum %u should be %u",
2930 ip->i_number, dap->da_newinum);
2931 }
2932 /*
2933 * If we are deleting a changed name that never made it to disk,
2934 * then return the dirrem describing the previous inode (which
2935 * represents the inode currently referenced from this entry on disk).
2936 */
2937 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) != 0) {
2938 *prevdirremp = dap->da_previousda_un.dau_previous;
2939 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
2940 dap->da_pagedepda_un.dau_pagedep = pagedep;
2941 }
2942 /*
2943 * We are deleting an entry that never made it to disk.
2944 * Mark it COMPLETE so we can delete its inode immediately.
2945 */
2946 dirrem->dm_statedm_list.wk_state |= COMPLETE0x0004;
2947 free_diradd(dap);
2948 return (dirrem);
2949}
2950
2951/*
2952 * Directory entry change dependencies.
2953 *
2954 * Changing an existing directory entry requires that an add operation
2955 * be completed first followed by a deletion. The semantics for the addition
2956 * are identical to the description of adding a new entry above except
2957 * that the rollback is to the old inode number rather than zero. Once
2958 * the addition dependency is completed, the removal is done as described
2959 * in the removal routine above.
2960 */
2961
2962/*
2963 * This routine should be called immediately after changing
2964 * a directory entry. The inode's link count should not be
2965 * decremented by the calling procedure -- the soft updates
2966 * code will perform this task when it is safe.
2967 */
2968/* buffer containing directory block */
2969/* inode for the directory being modified */
2970/* inode for directory entry being removed */
2971/* new inode number for changed entry */
2972/* indicates if doing RMDIR */
2973void
2974softdep_setup_directory_change(struct buf *bp, struct inode *dp,
2975 struct inode *ip, long newinum, int isrmdir)
2976{
2977 int offset;
2978 struct diradd *dap;
2979 struct dirrem *dirrem, *prevdirrem;
2980 struct pagedep *pagedep;
2981 struct inodedep *inodedep;
2982
2983 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2984 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2985 dap->da_list.wk_type = D_DIRADD10;
2986 dap->da_stateda_list.wk_state = DIRCHG0x0080 | ATTACHED0x0001 | DEPCOMPLETE0x0008;
2987 dap->da_offset = offset;
2988 dap->da_newinum = newinum;
2989
2990 /*
2991 * Allocate a new dirrem and ACQUIRE_LOCK.
2992 */
2993 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2994 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2995 /*
2996 * The possible values for isrmdir:
2997 * 0 - non-directory file rename
2998 * 1 - directory rename within same directory
2999 * inum - directory rename to new directory of given inode number
3000 * When renaming to a new directory, we are both deleting and
3001 * creating a new directory entry, so the link count on the new
3002 * directory should not change. Thus we do not need the followup
3003 * dirrem which is usually done in handle_workitem_remove. We set
3004 * the DIRCHG flag to tell handle_workitem_remove to skip the
3005 * followup dirrem.
3006 */
3007 if (isrmdir > 1)
3008 dirrem->dm_statedm_list.wk_state |= DIRCHG0x0080;
3009
3010 /*
3011 * If the COMPLETE flag is clear, then there were no active
3012 * entries and we want to roll back to the previous inode until
3013 * the new inode is committed to disk. If the COMPLETE flag is
3014 * set, then we have deleted an entry that never made it to disk.
3015 * If the entry we deleted resulted from a name change, then the old
3016 * inode reference still resides on disk. Any rollback that we do
3017 * needs to be to that old inode (returned to us in prevdirrem). If
3018 * the entry we deleted resulted from a create, then there is
3019 * no entry on the disk, so we want to roll back to zero rather
3020 * than the uncommitted inode. In either of the COMPLETE cases we
3021 * want to immediately free the unwritten and unreferenced inode.
3022 */
3023 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
3024 dap->da_previousda_un.dau_previous = dirrem;
3025 } else {
3026 if (prevdirrem != NULL((void *)0)) {
3027 dap->da_previousda_un.dau_previous = prevdirrem;
3028 } else {
3029 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
3030 dap->da_pagedepda_un.dau_pagedep = pagedep;
3031 }
3032 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
3033 add_to_worklist(&dirrem->dm_list);
3034 }
3035 /*
3036 * Link into its inodedep. Put it on the id_bufwait list if the inode
3037 * is not yet written. If it is written, do the post-inode write
3038 * processing to put it on the id_pendinghd list.
3039 */
3040 if (inodedep_lookup(dp->i_fsinode_u.fs, newinum, DEPALLOC0x0001, &inodedep) == 0 ||
3041 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
3042 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
3043 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
3044 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
3045 } else {
3046 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
3047 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
3048 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
3049 }
3050 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3051}
3052
3053/*
3054 * Called whenever the link count on an inode is changed.
3055 * It creates an inode dependency so that the new reference(s)
3056 * to the inode cannot be committed to disk until the updated
3057 * inode has been written.
3058 */
3059/* the inode with the increased link count */
3060/* do background work or not */
3061void
3062softdep_change_linkcnt(struct inode *ip, int nodelay)
3063{
3064 struct inodedep *inodedep;
3065 int flags;
3066
3067 /*
3068 * If requested, do not allow background work to happen.
3069 */
3070 flags = DEPALLOC0x0001;
3071 if (nodelay)
3072 flags |= NODELAY0x0002;
3073
3074 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3075
3076 (void) inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, flags, &inodedep);
3077 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3078 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3079 panic("softdep_change_linkcnt: bad delta");
3080 }
3081
3082 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3083
3084 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3085}
3086
3087/*
3088 * This workitem decrements the inode's link count.
3089 * If the link count reaches zero, the file is removed.
3090 */
3091STATIC void
3092handle_workitem_remove(struct dirrem *dirrem)
3093{
3094 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
3095 struct inodedep *inodedep;
3096 struct vnode *vp;
3097 struct inode *ip;
3098 ufsino_t oldinum;
3099 int error;
3100
3101 if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)(*(dirrem->dm_mnt)->mnt_op->vfs_vget)(dirrem->dm_mnt
, dirrem->dm_oldinum, &vp)
) != 0) {
3102 softdep_error("handle_workitem_remove: vget", error);
3103 return;
3104 }
3105 ip = VTOI(vp)((struct inode *)(vp)->v_data);
3106 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3107 if ((inodedep_lookup(ip->i_fsinode_u.fs, dirrem->dm_oldinum, 0, &inodedep))
3108 == 0) {
3109 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3110 panic("handle_workitem_remove: lost inodedep");
3111 }
3112 /*
3113 * Normal file deletion.
3114 */
3115 if ((dirrem->dm_statedm_list.wk_state & RMDIR0x0040) == 0) {
3116 DIP_ADD(ip, nlink, -1)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-1); else (ip)->dinode_u.ffs2_din
->di_nlink += (-1); } while (0)
;
3117 ip->i_flag |= IN_CHANGE0x0002;
3118 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3119 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3120 panic("handle_workitem_remove: bad file delta");
3121 }
3122 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3123 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3124 vput(vp);
3125 num_dirrem -= 1;
3126 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3127 return;
3128 }
3129 /*
3130 * Directory deletion. Decrement reference count for both the
3131 * just deleted parent directory entry and the reference for ".".
3132 * Next truncate the directory to length zero. When the
3133 * truncation completes, arrange to have the reference count on
3134 * the parent decremented to account for the loss of "..".
3135 */
3136 DIP_ADD(ip, nlink, -2)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-2); else (ip)->dinode_u.ffs2_din
->di_nlink += (-2); } while (0)
;
3137 ip->i_flag |= IN_CHANGE0x0002;
3138 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink)
3139 panic("handle_workitem_remove: bad dir delta");
3140 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3141 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3142 if ((error = UFS_TRUNCATE(ip, (off_t)0, 0, p->p_ucred)((ip)->i_vtbl->iv_truncate)((ip), ((off_t)0), (0), (p->
p_ucred))
) != 0)
3143 softdep_error("handle_workitem_remove: truncate", error);
3144 /*
3145 * Rename a directory to a new parent. Since, we are both deleting
3146 * and creating a new directory entry, the link count on the new
3147 * directory should not change. Thus we skip the followup dirrem.
3148 */
3149 if (dirrem->dm_statedm_list.wk_state & DIRCHG0x0080) {
3150 vput(vp);
3151 num_dirrem -= 1;
3152 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3153 return;
3154 }
3155 /*
3156 * If the inodedep does not exist, then the zero'ed inode has
3157 * been written to disk. If the allocated inode has never been
3158 * written to disk, then the on-disk inode is zero'ed. In either
3159 * case we can remove the file immediately.
3160 */
3161 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3162 dirrem->dm_statedm_list.wk_state = 0;
3163 oldinum = dirrem->dm_oldinum;
3164 dirrem->dm_oldinum = dirrem->dm_dirinumdm_un.dmu_dirinum;
3165 if (inodedep_lookup(ip->i_fsinode_u.fs, oldinum, 0, &inodedep) == 0 ||
3166 check_inode_unwritten(inodedep)) {
3167 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3168 vput(vp);
3169 handle_workitem_remove(dirrem);
3170 return;
3171 }
3172 WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_inowait)->lh_first) != ((void *)0)) (&inodedep
->id_inowait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_inowait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_inowait
)->lh_first; } while (0); } while (0)
;
3173 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3174 ip->i_flag |= IN_CHANGE0x0002;
3175 UFS_UPDATE(VTOI(vp), 0)((((struct inode *)(vp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(vp)->v_data)), (0))
;
3176 vput(vp);
3177}
3178
3179/*
3180 * Inode de-allocation dependencies.
3181 *
3182 * When an inode's link count is reduced to zero, it can be de-allocated. We
3183 * found it convenient to postpone de-allocation until after the inode is
3184 * written to disk with its new link count (zero). At this point, all of the
3185 * on-disk inode's block pointers are nullified and, with careful dependency
3186 * list ordering, all dependencies related to the inode will be satisfied and
3187 * the corresponding dependency structures de-allocated. So, if/when the
3188 * inode is reused, there will be no mixing of old dependencies with new
3189 * ones. This artificial dependency is set up by the block de-allocation
3190 * procedure above (softdep_setup_freeblocks) and completed by the
3191 * following procedure.
3192 */
3193STATIC void
3194handle_workitem_freefile(struct freefile *freefile)
3195{
3196 struct fs *fs;
3197 struct vnode vp;
3198 struct inode tip;
3199#ifdef DEBUG
3200 struct inodedep *idp;
3201#endif
3202 int error;
3203
3204 fs = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data))->um_fsufsmount_u.fs;
3205#ifdef DEBUG
3206 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3207 error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
3208 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3209 if (error)
3210 panic("handle_workitem_freefile: inodedep survived");
3211#endif
3212 tip.i_ump = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data));
3213 tip.i_dev = freefile->fx_devvp->v_rdevv_un.vu_specinfo->si_rdev;
3214 tip.i_fsinode_u.fs = fs;
3215 tip.i_vnode = &vp;
3216 vp.v_data = &tip;
3217
3218 if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
3219 freefile->fx_mode)) != 0) {
3220 softdep_error("handle_workitem_freefile", error);
3221 }
3222 WORKITEM_FREE(freefile, D_FREEFILE)softdep_freequeue_add((struct worklist *)freefile);
3223}
3224
3225/*
3226 * Disk writes.
3227 *
3228 * The dependency structures constructed above are most actively used when file
3229 * system blocks are written to disk. No constraints are placed on when a
3230 * block can be written, but unsatisfied update dependencies are made safe by
3231 * modifying (or replacing) the source memory for the duration of the disk
3232 * write. When the disk write completes, the memory block is again brought
3233 * up-to-date.
3234 *
3235 * In-core inode structure reclamation.
3236 *
3237 * Because there are a finite number of "in-core" inode structures, they are
3238 * reused regularly. By transferring all inode-related dependencies to the
3239 * in-memory inode block and indexing them separately (via "inodedep"s), we
3240 * can allow "in-core" inode structures to be reused at any time and avoid
3241 * any increase in contention.
3242 *
3243 * Called just before entering the device driver to initiate a new disk I/O.
3244 * The buffer must be locked, thus, no I/O completion operations can occur
3245 * while we are manipulating its associated dependencies.
3246 */
3247/* structure describing disk write to occur */
3248void
3249softdep_disk_io_initiation(struct buf *bp)
3250{
3251 struct worklist *wk, *nextwk;
3252 struct indirdep *indirdep;
3253 struct inodedep *inodedep;
3254 struct buf *sbp;
3255
3256 /*
3257 * We only care about write operations. There should never
3258 * be dependencies for reads.
3259 */
3260 if (bp->b_flags & B_READ0x00008000)
3261 panic("softdep_disk_io_initiation: read");
3262
3263 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3264
3265 /*
3266 * Do any necessary pre-I/O processing.
3267 */
3268 for (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first); wk; wk = nextwk) {
3269 nextwk = LIST_NEXT(wk, wk_list)((wk)->wk_list.le_next);
3270 switch (wk->wk_type) {
3271
3272 case D_PAGEDEP0:
3273 initiate_write_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp);
3274 continue;
3275
3276 case D_INODEDEP1:
3277 inodedep = WK_INODEDEP(wk)((struct inodedep *)(wk));
3278 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954)
3279 initiate_write_inodeblock_ufs1(inodedep, bp);
3280#ifdef FFS21
3281 else
3282 initiate_write_inodeblock_ufs2(inodedep, bp);
3283#endif
3284 continue;
3285
3286 case D_INDIRDEP5:
3287 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3288 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3289 panic("disk_io_initiation: indirdep gone");
3290 /*
3291 * If there are no remaining dependencies, this
3292 * will be writing the real pointers, so the
3293 * dependency can be freed.
3294 */
3295 if (LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first) == NULL((void *)0)) {
3296 sbp = indirdep->ir_savebp;
3297 sbp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
3298 /* inline expand WORKLIST_REMOVE(wk); */
3299 wk->wk_state &= ~ONWORKLIST0x8000;
3300 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
3301 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
3302 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3303 brelse(sbp);
3304 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3305 continue;
3306 }
3307 /*
3308 * Replace up-to-date version with safe version.
3309 */
3310 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3311 indirdep->ir_saveddata = malloc(bp->b_bcount,
3312 M_INDIRDEP83, M_WAITOK0x0001);
3313 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3314 indirdep->ir_stateir_list.wk_state &= ~ATTACHED0x0001;
3315 indirdep->ir_stateir_list.wk_state |= UNDONE0x0002;
3316 memcpy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount)__builtin_memcpy((indirdep->ir_saveddata), (bp->b_data)
, (bp->b_bcount))
;
3317 memcpy(bp->b_data, indirdep->ir_savebp->b_data,__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
3318 bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
;
3319 continue;
3320
3321 case D_MKDIR11:
3322 case D_BMSAFEMAP3:
3323 case D_ALLOCDIRECT4:
3324 case D_ALLOCINDIR6:
3325 continue;
3326
3327 default:
3328 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3329 panic("handle_disk_io_initiation: Unexpected type %s",
3330 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3331 /* NOTREACHED */
3332 }
3333 }
3334
3335 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3336}
3337
3338/*
3339 * Called from within the procedure above to deal with unsatisfied
3340 * allocation dependencies in a directory. The buffer must be locked,
3341 * thus, no I/O completion operations can occur while we are
3342 * manipulating its associated dependencies.
3343 */
3344STATIC void
3345initiate_write_filepage(struct pagedep *pagedep, struct buf *bp)
3346{
3347 struct diradd *dap;
3348 struct direct *ep;
3349 int i;
3350
3351 if (pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) {
3352 /*
3353 * This can only happen if there is a driver that does not
3354 * understand chaining. Here biodone will reissue the call
3355 * to strategy for the incomplete buffers.
3356 */
3357 printf("initiate_write_filepage: already started\n");
3358 return;
3359 }
3360 pagedep->pd_statepd_list.wk_state |= IOSTARTED0x0200;
3361 for (i = 0; i < DAHASHSZ6; i++) {
3362 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[i])->lh_first);
(dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
3363 ep = (struct direct *)
3364 ((char *)bp->b_data + dap->da_offset);
3365 if (ep->d_ino != dap->da_newinum) {
3366 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3367 panic("%s: dir inum %u != new %u",
3368 "initiate_write_filepage",
3369 ep->d_ino, dap->da_newinum);
3370 }
3371 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
3372 ep->d_ino = dap->da_previousda_un.dau_previous->dm_oldinum;
3373 else
3374 ep->d_ino = 0;
3375 dap->da_stateda_list.wk_state &= ~ATTACHED0x0001;
3376 dap->da_stateda_list.wk_state |= UNDONE0x0002;
3377 }
3378 }
3379}
3380
3381/*
3382 * Called from within the procedure above to deal with unsatisfied
3383 * allocation dependencies in an inodeblock. The buffer must be
3384 * locked, thus, no I/O completion operations can occur while we
3385 * are manipulating its associated dependencies.
3386 */
3387/* The inode block */
3388STATIC void
3389initiate_write_inodeblock_ufs1(struct inodedep *inodedep, struct buf *bp)
3390{
3391 struct allocdirect *adp, *lastadp;
3392 struct ufs1_dinode *dp;
3393 struct fs *fs;
3394#ifdef DIAGNOSTIC1
3395 daddr_t prevlbn = 0;
3396 int32_t d1, d2;
3397#endif
3398 int i, deplist;
3399
3400 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) {
3401 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3402 panic("initiate_write_inodeblock: already started");
3403 }
3404 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3405 fs = inodedep->id_fs;
3406 dp = (struct ufs1_dinode *)bp->b_data +
3407 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3408 /*
3409 * If the bitmap is not yet written, then the allocated
3410 * inode cannot be written to disk.
3411 */
3412 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3413 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
3414 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3415 panic("initiate_write_inodeblock: already doing I/O");
3416 }
3417 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3418 inodedep->id_savedino1id_un.idu_savedino1 = malloc(sizeof(struct ufs1_dinode),
3419 M_INODEDEP79, M_WAITOK0x0001);
3420 inodedep->id_unsize = sizeof(struct ufs1_dinode);
3421 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
3422 *inodedep->id_savedino1id_un.idu_savedino1 = *dp;
3423 memset(dp, 0, sizeof(struct ufs1_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs1_dinode)));
3424 return;
3425 }
3426 /*
3427 * If no dependencies, then there is nothing to roll back.
3428 */
3429 inodedep->id_savedsize = dp->di_size;
3430 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3431 return;
3432 /*
3433 * Set the dependencies to busy.
3434 */
3435 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3436 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3437#ifdef DIAGNOSTIC1
3438 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3439 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3440 panic("softdep_write_inodeblock: lbn order");
3441 }
3442 prevlbn = adp->ad_lbn;
3443 if (adp->ad_lbn < NDADDR12 &&
3444 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3445 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3446 panic("%s: direct pointer #%lld mismatch %d != %d",
3447 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3448 d1, d2);
3449 }
3450 if (adp->ad_lbn >= NDADDR12 &&
3451 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3452 (d2 = adp->ad_newblkno)) {
3453 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3454 panic("%s: indirect pointer #%lld mismatch %d != %d",
3455 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3456 NDADDR12), d1, d2);
3457 }
3458 deplist |= 1 << adp->ad_lbn;
3459 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3460 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3461 panic("softdep_write_inodeblock: Unknown state 0x%x",
3462 adp->ad_statead_list.wk_state);
3463 }
3464#endif /* DIAGNOSTIC */
3465 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3466 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3467 }
3468 /*
3469 * The on-disk inode cannot claim to be any larger than the last
3470 * fragment that has been written. Otherwise, the on-disk inode
3471 * might have fragments that were not the last block in the file
3472 * which would corrupt the filesystem.
3473 */
3474 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3475 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3476 if (adp->ad_lbn >= NDADDR12)
3477 break;
3478 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3479 /* keep going until hitting a rollback to a frag */
3480 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3481 continue;
3482 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3483 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3484#ifdef DIAGNOSTIC1
3485 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3486 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3487 panic("softdep_write_inodeblock: lost dep1");
3488 }
3489#endif /* DIAGNOSTIC */
3490 dp->di_db[i] = 0;
3491 }
3492 for (i = 0; i < NIADDR3; i++) {
3493#ifdef DIAGNOSTIC1
3494 if (dp->di_ib[i] != 0 &&
3495 (deplist & ((1 << NDADDR12) << i)) == 0) {
3496 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3497 panic("softdep_write_inodeblock: lost dep2");
3498 }
3499#endif /* DIAGNOSTIC */
3500 dp->di_ib[i] = 0;
3501 }
3502 return;
3503 }
3504 /*
3505 * If we have zero'ed out the last allocated block of the file,
3506 * roll back the size to the last currently allocated block.
3507 * We know that this last allocated block is a full-sized as
3508 * we already checked for fragments in the loop above.
3509 */
3510 if (lastadp != NULL((void *)0) &&
3511 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3512 for (i = lastadp->ad_lbn; i >= 0; i--)
3513 if (dp->di_db[i] != 0)
3514 break;
3515 dp->di_size = (i + 1) * fs->fs_bsize;
3516 }
3517 /*
3518 * The only dependencies are for indirect blocks.
3519 *
3520 * The file size for indirect block additions is not guaranteed.
3521 * Such a guarantee would be non-trivial to achieve. The conventional
3522 * synchronous write implementation also does not make this guarantee.
3523 * Fsck should catch and fix discrepancies. Arguably, the file size
3524 * can be over-estimated without destroying integrity when the file
3525 * moves into the indirect blocks (i.e., is large). If we want to
3526 * postpone fsck, we are stuck with this argument.
3527 */
3528 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3529 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3530}
3531
3532#ifdef FFS21
3533/*
3534 * Version of initiate_write_inodeblock that handles FFS2 dinodes.
3535 */
3536/* The inode block */
3537STATIC void
3538initiate_write_inodeblock_ufs2(struct inodedep *inodedep, struct buf *bp)
3539{
3540 struct allocdirect *adp, *lastadp;
3541 struct ufs2_dinode *dp;
3542 struct fs *fs = inodedep->id_fs;
3543#ifdef DIAGNOSTIC1
3544 daddr_t prevlbn = -1, d1, d2;
3545#endif
3546 int deplist, i;
3547
3548 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200)
3549 panic("initiate_write_inodeblock_ufs2: already started");
3550 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3551 fs = inodedep->id_fs;
3552 dp = (struct ufs2_dinode *)bp->b_data +
3553 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3554 /*
3555 * If the bitmap is not yet written, then the allocated
3556 * inode cannot be written to disk.
3557 */
3558 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3559 if (inodedep->id_savedino2id_un.idu_savedino2 != NULL((void *)0))
3560 panic("initiate_write_inodeblock_ufs2: I/O underway");
3561 inodedep->id_savedino2id_un.idu_savedino2 = malloc(sizeof(struct ufs2_dinode),
3562 M_INODEDEP79, M_WAITOK0x0001);
3563 inodedep->id_unsize = sizeof(struct ufs2_dinode);
3564 *inodedep->id_savedino2id_un.idu_savedino2 = *dp;
3565 memset(dp, 0, sizeof(struct ufs2_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs2_dinode)));
3566 return;
3567 }
3568 /*
3569 * If no dependencies, then there is nothing to roll back.
3570 */
3571 inodedep->id_savedsize = dp->di_size;
3572 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3573 return;
3574
3575#ifdef notyet
3576 inodedep->id_savedextsize = dp->di_extsize;
3577 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0) &&
3578 TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first) == NULL((void *)0))
3579 return;
3580 /*
3581 * Set the ext data dependencies to busy.
3582 */
3583 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3584 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3585#ifdef DIAGNOSTIC1
3586 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3587 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3588 panic("softdep_write_inodeblock: lbn order");
3589 }
3590 prevlbn = adp->ad_lbn;
3591 if ((d1 = dp->di_extb[adp->ad_lbn]) !=
3592 (d2 = adp->ad_newblkno)) {
3593 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3594 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3595 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3596 d1, d2);
3597 }
3598 deplist |= 1 << adp->ad_lbn;
3599 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3600 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3601 panic("softdep_write_inodeblock: Unknown state 0x%x",
3602 adp->ad_statead_list.wk_state);
3603 }
3604#endif /* DIAGNOSTIC */
3605 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3606 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3607 }
3608 /*
3609 * The on-disk inode cannot claim to be any larger than the last
3610 * fragment that has been written. Otherwise, the on-disk inode
3611 * might have fragments that were not the last block in the ext
3612 * data which would corrupt the filesystem.
3613 */
3614 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3615 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3616 dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
3617 /* keep going until hitting a rollback to a frag */
3618 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3619 continue;
3620 dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3621 for (i = adp->ad_lbn + 1; i < NXADDR2; i++) {
3622#ifdef DIAGNOSTIC1
3623 if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
3624 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3625 panic("softdep_write_inodeblock: lost dep1");
3626 }
3627#endif /* DIAGNOSTIC */
3628 dp->di_extb[i] = 0;
3629 }
3630 lastadp = NULL((void *)0);
3631 break;
3632 }
3633 /*
3634 * If we have zero'ed out the last allocated block of the ext
3635 * data, roll back the size to the last currently allocated block.
3636 * We know that this last allocated block is a full-sized as
3637 * we already checked for fragments in the loop above.
3638 */
3639 if (lastadp != NULL((void *)0) &&
3640 dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3641 for (i = lastadp->ad_lbn; i >= 0; i--)
3642 if (dp->di_extb[i] != 0)
3643 break;
3644 dp->di_extsize = (i + 1) * fs->fs_bsize;
3645 }
3646#endif /* notyet */
3647
3648 /*
3649 * Set the file data dependencies to busy.
3650 */
3651 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3652 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3653#ifdef DIAGNOSTIC1
3654 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3655 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3656 panic("softdep_write_inodeblock: lbn order");
3657 }
3658 prevlbn = adp->ad_lbn;
3659 if (adp->ad_lbn < NDADDR12 &&
3660 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3661 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3662 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3663 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3664 d1, d2);
3665 }
3666 if (adp->ad_lbn >= NDADDR12 &&
3667 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3668 (d2 = adp->ad_newblkno)) {
3669 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3670 panic("%s: indirect pointer #%lld mismatch %lld != %lld",
3671 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3672 NDADDR12), d1, d2);
3673 }
3674 deplist |= 1 << adp->ad_lbn;
3675 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3676 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3677 panic("softdep_write_inodeblock: Unknown state 0x%x",
3678 adp->ad_statead_list.wk_state);
3679 }
3680#endif /* DIAGNOSTIC */
3681 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3682 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3683 }
3684 /*
3685 * The on-disk inode cannot claim to be any larger than the last
3686 * fragment that has been written. Otherwise, the on-disk inode
3687 * might have fragments that were not the last block in the file
3688 * which would corrupt the filesystem.
3689 */
3690 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3691 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3692 if (adp->ad_lbn >= NDADDR12)
3693 break;
3694 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3695 /* keep going until hitting a rollback to a frag */
3696 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3697 continue;
3698 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3699 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3700#ifdef DIAGNOSTIC1
3701 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3702 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3703 panic("softdep_write_inodeblock: lost dep2");
3704 }
3705#endif /* DIAGNOSTIC */
3706 dp->di_db[i] = 0;
3707 }
3708 for (i = 0; i < NIADDR3; i++) {
3709#ifdef DIAGNOSTIC1
3710 if (dp->di_ib[i] != 0 &&
3711 (deplist & ((1 << NDADDR12) << i)) == 0) {
3712 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3713 panic("softdep_write_inodeblock: lost dep3");
3714 }
3715#endif /* DIAGNOSTIC */
3716 dp->di_ib[i] = 0;
3717 }
3718 return;
3719 }
3720 /*
3721 * If we have zero'ed out the last allocated block of the file,
3722 * roll back the size to the last currently allocated block.
3723 * We know that this last allocated block is a full-sized as
3724 * we already checked for fragments in the loop above.
3725 */
3726 if (lastadp != NULL((void *)0) &&
3727 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3728 for (i = lastadp->ad_lbn; i >= 0; i--)
3729 if (dp->di_db[i] != 0)
3730 break;
3731 dp->di_size = (i + 1) * fs->fs_bsize;
3732 }
3733 /*
3734 * The only dependencies are for indirect blocks.
3735 *
3736 * The file size for indirect block additions is not guaranteed.
3737 * Such a guarantee would be non-trivial to achieve. The conventional
3738 * synchronous write implementation also does not make this guarantee.
3739 * Fsck should catch and fix discrepancies. Arguably, the file size
3740 * can be over-estimated without destroying integrity when the file
3741 * moves into the indirect blocks (i.e., is large). If we want to
3742 * postpone fsck, we are stuck with this argument.
3743 */
3744 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3745 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3746}
3747#endif /* FFS2 */
3748
3749/*
3750 * This routine is called during the completion interrupt
3751 * service routine for a disk write (from the procedure called
3752 * by the device driver to inform the file system caches of
3753 * a request completion). It should be called early in this
3754 * procedure, before the block is made available to other
3755 * processes or other routines are called.
3756 */
3757/* describes the completed disk write */
3758void
3759softdep_disk_write_complete(struct buf *bp)
3760{
3761 struct worklist *wk;
3762 struct workhead reattach;
3763 struct newblk *newblk;
3764 struct allocindir *aip;
3765 struct allocdirect *adp;
3766 struct indirdep *indirdep;
3767 struct inodedep *inodedep;
3768 struct bmsafemap *bmsafemap;
3769
3770 /*
3771 * If an error occurred while doing the write, then the data
3772 * has not hit the disk and the dependencies cannot be unrolled.
3773 */
3774 if ((bp->b_flags & B_ERROR0x00000400) && !(bp->b_flags & B_INVAL0x00000800))
3775 return;
3776
3777#ifdef DEBUG
3778 if (lk.lkt_held != -1)
3779 panic("softdep_disk_write_complete: lock is held");
3780 lk.lkt_held = -2;
3781#endif
3782 LIST_INIT(&reattach)do { ((&reattach)->lh_first) = ((void *)0); } while (0
)
;
3783 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
3784 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3785 switch (wk->wk_type) {
3786
3787 case D_PAGEDEP0:
3788 if (handle_written_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp))
3789 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3790 continue;
3791
3792 case D_INODEDEP1:
3793 if (handle_written_inodeblock(WK_INODEDEP(wk)((struct inodedep *)(wk)), bp))
3794 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3795 continue;
3796
3797 case D_BMSAFEMAP3:
3798 bmsafemap = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk));
3799 while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd)((&bmsafemap->sm_newblkhd)->lh_first))) {
3800 newblk->nb_state |= DEPCOMPLETE0x0008;
3801 newblk->nb_bmsafemap = NULL((void *)0);
3802 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
3803 }
3804 while ((adp =
3805 LIST_FIRST(&bmsafemap->sm_allocdirecthd)((&bmsafemap->sm_allocdirecthd)->lh_first))) {
3806 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
3807 adp->ad_buf = NULL((void *)0);
3808 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
3809 handle_allocdirect_partdone(adp);
3810 }
3811 while ((aip =
3812 LIST_FIRST(&bmsafemap->sm_allocindirhd)((&bmsafemap->sm_allocindirhd)->lh_first))) {
3813 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
3814 aip->ai_buf = NULL((void *)0);
3815 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
3816 handle_allocindir_partdone(aip);
3817 }
3818 while ((inodedep =
3819 LIST_FIRST(&bmsafemap->sm_inodedephd)((&bmsafemap->sm_inodedephd)->lh_first)) != NULL((void *)0)) {
3820 inodedep->id_stateid_list.wk_state |= DEPCOMPLETE0x0008;
3821 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
3822 inodedep->id_buf = NULL((void *)0);
3823 }
3824 WORKITEM_FREE(bmsafemap, D_BMSAFEMAP)softdep_freequeue_add((struct worklist *)bmsafemap);
3825 continue;
3826
3827 case D_MKDIR11:
3828 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_BODY0x0020);
3829 continue;
3830
3831 case D_ALLOCDIRECT4:
3832 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
3833 adp->ad_statead_list.wk_state |= COMPLETE0x0004;
3834 handle_allocdirect_partdone(adp);
3835 continue;
3836
3837 case D_ALLOCINDIR6:
3838 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
3839 aip->ai_stateai_list.wk_state |= COMPLETE0x0004;
3840 handle_allocindir_partdone(aip);
3841 continue;
3842
3843 case D_INDIRDEP5:
3844 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3845 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3846 panic("disk_write_complete: indirdep gone");
3847 memcpy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_saveddata)
, (bp->b_bcount))
;
3848 free(indirdep->ir_saveddata, M_INDIRDEP83, bp->b_bcount);
3849 indirdep->ir_saveddata = NULL((void *)0);
3850 indirdep->ir_stateir_list.wk_state &= ~UNDONE0x0002;
3851 indirdep->ir_stateir_list.wk_state |= ATTACHED0x0001;
3852 while ((aip = LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))) {
3853 handle_allocindir_partdone(aip);
3854 if (aip == LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))
3855 panic("disk_write_complete: not gone");
3856 }
3857 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3858 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
3859 stat_indir_blk_ptrs++;
3860 buf_dirty(bp);
3861 continue;
3862
3863 default:
3864 panic("handle_disk_write_complete: Unknown type %s",
3865 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3866 /* NOTREACHED */
3867 }
3868 }
3869 /*
3870 * Reattach any requests that must be redone.
3871 */
3872 while ((wk = LIST_FIRST(&reattach)((&reattach)->lh_first)) != NULL((void *)0)) {
3873 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3874 WORKLIST_INSERT(&bp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&bp->b_dep)->lh_first) != ((void *)0)) (
&bp->b_dep)->lh_first->wk_list.le_prev = &(wk
)->wk_list.le_next; (&bp->b_dep)->lh_first = (wk
); (wk)->wk_list.le_prev = &(&bp->b_dep)->lh_first
; } while (0); } while (0)
;
3875 }
3876#ifdef DEBUG
3877 if (lk.lkt_held != -2)
3878 panic("softdep_disk_write_complete: lock lost");
3879 lk.lkt_held = -1;
3880#endif
3881}
3882
3883/*
3884 * Called from within softdep_disk_write_complete above. Note that
3885 * this routine is always called from interrupt level with further
3886 * splbio interrupts blocked.
3887 */
3888/* the completed allocdirect */
3889STATIC void
3890handle_allocdirect_partdone(struct allocdirect *adp)
3891{
3892 struct allocdirect *listadp;
3893 struct inodedep *inodedep;
3894 long bsize, delay;
3895
3896 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
3897
3898 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3899 return;
3900 if (adp->ad_buf != NULL((void *)0))
3901 panic("handle_allocdirect_partdone: dangling dep");
3902
3903 /*
3904 * The on-disk inode cannot claim to be any larger than the last
3905 * fragment that has been written. Otherwise, the on-disk inode
3906 * might have fragments that were not the last block in the file
3907 * which would corrupt the filesystem. Thus, we cannot free any
3908 * allocdirects after one whose ad_oldblkno claims a fragment as
3909 * these blocks must be rolled back to zero before writing the inode.
3910 * We check the currently active set of allocdirects in id_inoupdt.
3911 */
3912 inodedep = adp->ad_inodedep;
3913 bsize = inodedep->id_fs->fs_bsize;
3914 TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next)for((listadp) = ((&inodedep->id_inoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
{
3915 /* found our block */
3916 if (listadp == adp)
3917 break;
3918 /* continue if ad_oldlbn is not a fragment */
3919 if (listadp->ad_oldsize == 0 ||
3920 listadp->ad_oldsize == bsize)
3921 continue;
3922 /* hit a fragment */
3923 return;
3924 }
3925 /*
3926 * If we have reached the end of the current list without
3927 * finding the just finished dependency, then it must be
3928 * on the future dependency list. Future dependencies cannot
3929 * be freed until they are moved to the current list.
3930 */
3931 if (listadp == NULL((void *)0)) {
3932#ifdef DEBUG
3933 TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)for((listadp) = ((&inodedep->id_newinoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
3934 /* found our block */
3935 if (listadp == adp)
3936 break;
3937 if (listadp == NULL((void *)0))
3938 panic("handle_allocdirect_partdone: lost dep");
3939#endif /* DEBUG */
3940 return;
3941 }
3942 /*
3943 * If we have found the just finished dependency, then free
3944 * it along with anything that follows it that is complete.
3945 * If the inode still has a bitmap dependency, then it has
3946 * never been written to disk, hence the on-disk inode cannot
3947 * reference the old fragment so we can free it without delay.
3948 */
3949 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
3950 for (; adp; adp = listadp) {
3951 listadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
3952 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3953 return;
3954 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
3955 }
3956}
3957
3958/*
3959 * Called from within softdep_disk_write_complete above. Note that
3960 * this routine is always called from interrupt level with further
3961 * splbio interrupts blocked.
3962 */
3963/* the completed allocindir */
3964STATIC void
3965handle_allocindir_partdone(struct allocindir *aip)
3966{
3967 struct indirdep *indirdep;
3968
3969 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
3970
3971 if ((aip->ai_stateai_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3972 return;
3973 if (aip->ai_buf != NULL((void *)0))
3974 panic("handle_allocindir_partdone: dangling dependency");
3975 indirdep = aip->ai_indirdep;
3976 if (indirdep->ir_stateir_list.wk_state & UNDONE0x0002) {
3977 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3978 LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_donehd
)->lh_first) != ((void *)0)) (&indirdep->ir_donehd)
->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_donehd)->lh_first = (aip); (aip)->
ai_next.le_prev = &(&indirdep->ir_donehd)->lh_first
; } while (0)
;
3979 return;
3980 }
3981 if (indirdep->ir_stateir_list.wk_state & UFS1FMT0x2000)
3982 ((int32_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3983 aip->ai_newblkno;
3984 else
3985 ((int64_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3986 aip->ai_newblkno;
3987 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3988 if (aip->ai_freefrag != NULL((void *)0))
3989 add_to_worklist(&aip->ai_freefrag->ff_list);
3990 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
3991}
3992
3993/*
3994 * Called from within softdep_disk_write_complete above to restore
3995 * in-memory inode block contents to their most up-to-date state. Note
3996 * that this routine is always called from interrupt level with further
3997 * splbio interrupts blocked.
3998 */
3999/* buffer containing the inode block */
4000STATIC int
4001handle_written_inodeblock(struct inodedep *inodedep, struct buf *bp)
4002{
4003 struct worklist *wk, *filefree;
4004 struct allocdirect *adp, *nextadp;
4005 struct ufs1_dinode *dp1 = NULL((void *)0);
4006 struct ufs2_dinode *dp2 = NULL((void *)0);
4007 int hadchanges, fstype;
4008
4009 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4010
4011 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) == 0)
4012 panic("handle_written_inodeblock: not started");
4013 inodedep->id_stateid_list.wk_state &= ~IOSTARTED0x0200;
4014
4015 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954) {
4016 fstype = UM_UFS11;
4017 dp1 = (struct ufs1_dinode *) bp->b_data +
4018 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4019 } else {
4020 fstype = UM_UFS22;
4021 dp2 = (struct ufs2_dinode *) bp->b_data +
4022 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4023 }
4024
4025 /*
4026 * If we had to rollback the inode allocation because of
4027 * bitmaps being incomplete, then simply restore it.
4028 * Keep the block dirty so that it will not be reclaimed until
4029 * all associated dependencies have been cleared and the
4030 * corresponding updates written to disk.
4031 */
4032 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
4033 if (fstype == UM_UFS11)
4034 *dp1 = *inodedep->id_savedino1id_un.idu_savedino1;
4035 else
4036 *dp2 = *inodedep->id_savedino2id_un.idu_savedino2;
4037 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
4038 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
4039 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4040 stat_inode_bitmap++;
4041 buf_dirty(bp);
4042 return (1);
4043 }
4044 inodedep->id_stateid_list.wk_state |= COMPLETE0x0004;
4045 /*
4046 * Roll forward anything that had to be rolled back before
4047 * the inode could be updated.
4048 */
4049 hadchanges = 0;
4050 for (adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp; adp = nextadp) {
4051 nextadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
4052 if (adp->ad_statead_list.wk_state & ATTACHED0x0001)
4053 panic("handle_written_inodeblock: new entry");
4054 if (fstype == UM_UFS11) {
4055 if (adp->ad_lbn < NDADDR12) {
4056 if (dp1->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4057 panic("%s: %s #%lld mismatch %d != "
4058 "%lld",
4059 "handle_written_inodeblock",
4060 "direct pointer",
4061 (long long)adp->ad_lbn,
4062 dp1->di_db[adp->ad_lbn],
4063 (long long)adp->ad_oldblkno);
4064 dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
4065 } else {
4066 if (dp1->di_ib[adp->ad_lbn - NDADDR12] != 0)
4067 panic("%s: %s #%lld allocated as %d",
4068 "handle_written_inodeblock",
4069 "indirect pointer",
4070 (long long)(adp->ad_lbn - NDADDR12),
4071 dp1->di_ib[adp->ad_lbn - NDADDR12]);
4072 dp1->di_ib[adp->ad_lbn - NDADDR12] =
4073 adp->ad_newblkno;
4074 }
4075 } else {
4076 if (adp->ad_lbn < NDADDR12) {
4077 if (dp2->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4078 panic("%s: %s #%lld mismatch %lld != "
4079 "%lld", "handle_written_inodeblock",
4080 "direct pointer",
4081 (long long)adp->ad_lbn,
4082 dp2->di_db[adp->ad_lbn],
4083 (long long)adp->ad_oldblkno);
4084 dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
4085 } else {
4086 if (dp2->di_ib[adp->ad_lbn - NDADDR12] != 0)
4087 panic("%s: %s #%lld allocated as %lld",
4088 "handle_written_inodeblock",
4089 "indirect pointer",
4090 (long long)(adp->ad_lbn - NDADDR12),
4091 dp2->di_ib[adp->ad_lbn - NDADDR12]);
4092 dp2->di_ib[adp->ad_lbn - NDADDR12] =
4093 adp->ad_newblkno;
4094 }
4095 }
4096 adp->ad_statead_list.wk_state &= ~UNDONE0x0002;
4097 adp->ad_statead_list.wk_state |= ATTACHED0x0001;
4098 hadchanges = 1;
4099 }
4100 if (hadchanges && (bp->b_flags & B_DELWRI0x00000080) == 0)
4101 stat_direct_blk_ptrs++;
4102 /*
4103 * Reset the file size to its most up-to-date value.
4104 */
4105 if (inodedep->id_savedsize == -1)
4106 panic("handle_written_inodeblock: bad size");
4107
4108 if (fstype == UM_UFS11) {
4109 if (dp1->di_size != inodedep->id_savedsize) {
4110 dp1->di_size = inodedep->id_savedsize;
4111 hadchanges = 1;
4112 }
4113 } else {
4114 if (dp2->di_size != inodedep->id_savedsize) {
4115 dp2->di_size = inodedep->id_savedsize;
4116 hadchanges = 1;
4117 }
4118 }
4119 inodedep->id_savedsize = -1;
4120 /*
4121 * If there were any rollbacks in the inode block, then it must be
4122 * marked dirty so that its will eventually get written back in
4123 * its correct form.
4124 */
4125 if (hadchanges)
4126 buf_dirty(bp);
4127 /*
4128 * Process any allocdirects that completed during the update.
4129 */
4130 if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
4131 handle_allocdirect_partdone(adp);
4132 /*
4133 * Process deallocations that were held pending until the
4134 * inode had been written to disk. Freeing of the inode
4135 * is delayed until after all blocks have been freed to
4136 * avoid creation of new <vfsid, inum, lbn> triples
4137 * before the old ones have been deleted.
4138 */
4139 filefree = NULL((void *)0);
4140 while ((wk = LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first)) != NULL((void *)0)) {
4141 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4142 switch (wk->wk_type) {
4143
4144 case D_FREEFILE9:
4145 /*
4146 * We defer adding filefree to the worklist until
4147 * all other additions have been made to ensure
4148 * that it will be done after all the old blocks
4149 * have been freed.
4150 */
4151 if (filefree != NULL((void *)0))
4152 panic("handle_written_inodeblock: filefree");
4153 filefree = wk;
4154 continue;
4155
4156 case D_MKDIR11:
4157 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_PARENT0x0010);
4158 continue;
4159
4160 case D_DIRADD10:
4161 diradd_inode_written(WK_DIRADD(wk)((struct diradd *)(wk)), inodedep);
4162 continue;
4163
4164 case D_FREEBLKS8:
4165 wk->wk_state |= COMPLETE0x0004;
4166 if ((wk->wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
4167 continue;
4168 /* FALLTHROUGH */
4169 case D_FREEFRAG7:
4170 case D_DIRREM12:
4171 add_to_worklist(wk);
4172 continue;
4173
4174 case D_NEWDIRBLK13:
4175 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
4176 continue;
4177
4178 default:
4179 panic("handle_written_inodeblock: Unknown type %s",
4180 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4181 /* NOTREACHED */
4182 }
4183 }
4184 if (filefree != NULL((void *)0)) {
4185 if (free_inodedep(inodedep) == 0)
4186 panic("handle_written_inodeblock: live inodedep");
4187 add_to_worklist(filefree);
4188 return (0);
4189 }
4190
4191 /*
4192 * If no outstanding dependencies, free it.
4193 */
4194 if (free_inodedep(inodedep) ||
4195 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
4196 return (0);
4197 return (hadchanges);
4198}
4199
4200/*
4201 * Process a diradd entry after its dependent inode has been written.
4202 * This routine must be called with splbio interrupts blocked.
4203 */
4204STATIC void
4205diradd_inode_written(struct diradd *dap, struct inodedep *inodedep)
4206{
4207 struct pagedep *pagedep;
4208
4209 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4210
4211 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
4212 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4213 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4214 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4215 else
4216 pagedep = dap->da_pagedepda_un.dau_pagedep;
4217 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4218 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4219 }
4220 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
4221}
4222
4223/*
4224 * Handle the completion of a mkdir dependency.
4225 */
4226STATIC void
4227handle_written_mkdir(struct mkdir *mkdir, int type)
4228{
4229 struct diradd *dap;
4230 struct pagedep *pagedep;
4231
4232 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4233
4234 if (mkdir->md_statemd_list.wk_state != type)
4235 panic("handle_written_mkdir: bad type");
4236 dap = mkdir->md_diradd;
4237 dap->da_stateda_list.wk_state &= ~type;
4238 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) == 0)
4239 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
4240 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4241 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4242 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4243 else
4244 pagedep = dap->da_pagedepda_un.dau_pagedep;
4245 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4246 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4247 }
4248 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
4249 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
4250}
4251
4252/*
4253 * Called from within softdep_disk_write_complete above.
4254 * A write operation was just completed. Removed inodes can
4255 * now be freed and associated block pointers may be committed.
4256 * Note that this routine is always called from interrupt level
4257 * with further splbio interrupts blocked.
4258 */
4259/* buffer containing the written page */
4260STATIC int
4261handle_written_filepage(struct pagedep *pagedep, struct buf *bp)
4262{
4263 struct dirrem *dirrem;
4264 struct diradd *dap, *nextdap;
4265 struct direct *ep;
4266 int i, chgs;
4267
4268 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4269
4270 if ((pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) == 0)
4271 panic("handle_written_filepage: not started");
4272 pagedep->pd_statepd_list.wk_state &= ~IOSTARTED0x0200;
4273 /*
4274 * Process any directory removals that have been committed.
4275 */
4276 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first)) != NULL((void *)0)) {
4277 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
4278 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
4279 add_to_worklist(&dirrem->dm_list);
4280 }
4281 /*
4282 * Free any directory additions that have been committed.
4283 * If it is a newly allocated block, we have to wait until
4284 * the on-disk directory inode claims the new block.
4285 */
4286 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0)
4287 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
4288 free_diradd(dap);
4289 /*
4290 * Uncommitted directory entries must be restored.
4291 */
4292 for (chgs = 0, i = 0; i < DAHASHSZ6; i++) {
4293 for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first); dap;
4294 dap = nextdap) {
4295 nextdap = LIST_NEXT(dap, da_pdlist)((dap)->da_pdlist.le_next);
4296 if (dap->da_stateda_list.wk_state & ATTACHED0x0001)
4297 panic("handle_written_filepage: attached");
4298 ep = (struct direct *)
4299 ((char *)bp->b_data + dap->da_offset);
4300 ep->d_ino = dap->da_newinum;
4301 dap->da_stateda_list.wk_state &= ~UNDONE0x0002;
4302 dap->da_stateda_list.wk_state |= ATTACHED0x0001;
4303 chgs = 1;
4304 /*
4305 * If the inode referenced by the directory has
4306 * been written out, then the dependency can be
4307 * moved to the pending list.
4308 */
4309 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4310 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4311 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
4312 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4313 }
4314 }
4315 }
4316 /*
4317 * If there were any rollbacks in the directory, then it must be
4318 * marked dirty so that its will eventually get written back in
4319 * its correct form.
4320 */
4321 if (chgs) {
4322 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4323 stat_dir_entry++;
4324 buf_dirty(bp);
4325 return (1);
4326 }
4327 /*
4328 * If we are not waiting for a new directory block to be
4329 * claimed by its inode, then the pagedep will be freed.
4330 * Otherwise it will remain to track any new entries on
4331 * the page in case they are fsync'ed.
4332 */
4333 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0) {
4334 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
4335 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
4336 }
4337 return (0);
4338}
4339
4340/*
4341 * Writing back in-core inode structures.
4342 *
4343 * The file system only accesses an inode's contents when it occupies an
4344 * "in-core" inode structure. These "in-core" structures are separate from
4345 * the page frames used to cache inode blocks. Only the latter are
4346 * transferred to/from the disk. So, when the updated contents of the
4347 * "in-core" inode structure are copied to the corresponding in-memory inode
4348 * block, the dependencies are also transferred. The following procedure is
4349 * called when copying a dirty "in-core" inode to a cached inode block.
4350 */
4351
4352/*
4353 * Called when an inode is loaded from disk. If the effective link count
4354 * differed from the actual link count when it was last flushed, then we
4355 * need to ensure that the correct effective link count is put back.
4356 */
4357/* the "in_core" copy of the inode */
4358void
4359softdep_load_inodeblock(struct inode *ip)
4360{
4361 struct inodedep *inodedep;
4362
4363 /*
4364 * Check for alternate nlink count.
4365 */
4366 ip->i_effnlink = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
;
4367 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4368 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4369 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4370 return;
4371 }
4372 ip->i_effnlink -= inodedep->id_nlinkdelta;
4373 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4374}
4375
4376/*
4377 * This routine is called just before the "in-core" inode
4378 * information is to be copied to the in-memory inode block.
4379 * Recall that an inode block contains several inodes. If
4380 * the force flag is set, then the dependencies will be
4381 * cleared so that the update can always be made. Note that
4382 * the buffer is locked when this routine is called, so we
4383 * will never be in the middle of writing the inode block
4384 * to disk.
4385 */
4386/* the "in_core" copy of the inode */
4387/* the buffer containing the inode block */
4388/* nonzero => update must be allowed */
4389void
4390softdep_update_inodeblock(struct inode *ip, struct buf *bp, int waitfor)
4391{
4392 struct inodedep *inodedep;
4393 struct worklist *wk;
4394 int error, gotit;
4395
4396 /*
4397 * If the effective link count is not equal to the actual link
4398 * count, then we must track the difference in an inodedep while
4399 * the inode is (potentially) tossed out of the cache. Otherwise,
4400 * if there is no existing inodedep, then there are no dependencies
4401 * to track.
4402 */
4403 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4404 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4405 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4406 if (ip->i_effnlink != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
)
4407 panic("softdep_update_inodeblock: bad link count");
4408 return;
4409 }
4410 if (inodedep->id_nlinkdelta != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink) {
4411 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4412 panic("softdep_update_inodeblock: bad delta");
4413 }
4414 /*
4415 * Changes have been initiated. Anything depending on these
4416 * changes cannot occur until this inode has been written.
4417 */
4418 inodedep->id_stateid_list.wk_state &= ~COMPLETE0x0004;
4419 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) == 0)
4420 WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list)do { (&inodedep->id_list)->wk_state |= 0x8000; do {
if (((&inodedep->id_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&inodedep->
id_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&inodedep->id_list); (&inodedep->id_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
4421 /*
4422 * Any new dependencies associated with the incore inode must
4423 * now be moved to the list associated with the buffer holding
4424 * the in-memory copy of the inode. Once merged process any
4425 * allocdirects that are completed by the merger.
4426 */
4427 merge_inode_lists(inodedep);
4428 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0))
4429 handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first));
4430 /*
4431 * Now that the inode has been pushed into the buffer, the
4432 * operations dependent on the inode being written to disk
4433 * can be moved to the id_bufwait so that they will be
4434 * processed when the buffer I/O completes.
4435 */
4436 while ((wk = LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first)) != NULL((void *)0)) {
4437 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4438 WORKLIST_INSERT(&inodedep->id_bufwait, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&inodedep->id_bufwait)->lh_first) != ((void
*)0)) (&inodedep->id_bufwait)->lh_first->wk_list
.le_prev = &(wk)->wk_list.le_next; (&inodedep->
id_bufwait)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&inodedep->id_bufwait)->lh_first; } while (0); } while
(0)
;
4439 }
4440 /*
4441 * Newly allocated inodes cannot be written until the bitmap
4442 * that allocates them have been written (indicated by
4443 * DEPCOMPLETE being set in id_state). If we are doing a
4444 * forced sync (e.g., an fsync on a file), we force the bitmap
4445 * to be written so that the update can be done.
4446 */
4447 do {
4448 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 || waitfor == 0) {
4449 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4450 return;
4451 }
4452 bp = inodedep->id_buf;
4453 gotit = getdirtybuf(bp, MNT_WAIT1);
4454 } while (gotit == -1);
4455 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4456 if (gotit && (error = bwrite(bp)) != 0)
4457 softdep_error("softdep_update_inodeblock: bwrite", error);
4458 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0)
4459 panic("softdep_update_inodeblock: update failed");
4460}
4461
4462/*
4463 * Merge the new inode dependency list (id_newinoupdt) into the old
4464 * inode dependency list (id_inoupdt). This routine must be called
4465 * with splbio interrupts blocked.
4466 */
4467STATIC void
4468merge_inode_lists(struct inodedep *inodedep)
4469{
4470 struct allocdirect *listadp, *newadp;
4471
4472 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4473
4474 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4475 for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); listadp && newadp;) {
4476 if (listadp->ad_lbn < newadp->ad_lbn) {
4477 listadp = TAILQ_NEXT(listadp, ad_next)((listadp)->ad_next.tqe_next);
4478 continue;
4479 }
4480 TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next)do { if (((newadp)->ad_next.tqe_next) != ((void *)0)) (newadp
)->ad_next.tqe_next->ad_next.tqe_prev = (newadp)->ad_next
.tqe_prev; else (&inodedep->id_newinoupdt)->tqh_last
= (newadp)->ad_next.tqe_prev; *(newadp)->ad_next.tqe_prev
= (newadp)->ad_next.tqe_next; ((newadp)->ad_next.tqe_prev
) = ((void *)-1); ((newadp)->ad_next.tqe_next) = ((void *)
-1); } while (0)
;
4481 TAILQ_INSERT_BEFORE(listadp, newadp, ad_next)do { (newadp)->ad_next.tqe_prev = (listadp)->ad_next.tqe_prev
; (newadp)->ad_next.tqe_next = (listadp); *(listadp)->ad_next
.tqe_prev = (newadp); (listadp)->ad_next.tqe_prev = &(
newadp)->ad_next.tqe_next; } while (0)
;
4482 if (listadp->ad_lbn == newadp->ad_lbn) {
4483 allocdirect_merge(&inodedep->id_inoupdt, newadp,
4484 listadp);
4485 listadp = newadp;
4486 }
4487 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4488 }
4489 TAILQ_CONCAT(&inodedep->id_inoupdt, &inodedep->id_newinoupdt, ad_next)do { if (!(((&inodedep->id_newinoupdt)->tqh_first) ==
((void *)0))) { *(&inodedep->id_inoupdt)->tqh_last
= (&inodedep->id_newinoupdt)->tqh_first; (&inodedep
->id_newinoupdt)->tqh_first->ad_next.tqe_prev = (&
inodedep->id_inoupdt)->tqh_last; (&inodedep->id_inoupdt
)->tqh_last = (&inodedep->id_newinoupdt)->tqh_last
; do { ((&inodedep->id_newinoupdt))->tqh_first = ((
void *)0); ((&inodedep->id_newinoupdt))->tqh_last =
&((&inodedep->id_newinoupdt))->tqh_first; } while
(0); } } while (0)
;
4490}
4491
4492/*
4493 * If we are doing an fsync, then we must ensure that any directory
4494 * entries for the inode have been written after the inode gets to disk.
4495 */
4496/* the "in_core" copy of the inode */
4497int
4498softdep_fsync(struct vnode *vp)
4499{
4500 struct inodedep *inodedep;
4501 struct pagedep *pagedep;
4502 struct worklist *wk;
4503 struct diradd *dap;
4504 struct mount *mnt;
4505 struct vnode *pvp;
4506 struct inode *ip;
4507 struct inode *pip;
4508 struct buf *bp;
4509 struct fs *fs;
4510 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
4511 int error, flushparent;
4512 ufsino_t parentino;
4513 daddr_t lbn;
4514
4515 ip = VTOI(vp)((struct inode *)(vp)->v_data);
4516 fs = ip->i_fsinode_u.fs;
4517 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4518 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
4519 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4520 return (0);
4521 }
4522 if (LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
4523 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
4524 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
4525 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0)) {
4526 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4527 panic("softdep_fsync: pending ops");
4528 }
4529 for (error = 0, flushparent = 0; ; ) {
4530 if ((wk = LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first)) == NULL((void *)0))
4531 break;
4532 if (wk->wk_type != D_DIRADD10) {
4533 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4534 panic("softdep_fsync: Unexpected type %s",
4535 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4536 }
4537 dap = WK_DIRADD(wk)((struct diradd *)(wk));
4538 /*
4539 * Flush our parent if this directory entry has a MKDIR_PARENT
4540 * dependency or is contained in a newly allocated block.
4541 */
4542 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4543 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4544 else
4545 pagedep = dap->da_pagedepda_un.dau_pagedep;
4546 mnt = pagedep->pd_mnt;
4547 parentino = pagedep->pd_ino;
4548 lbn = pagedep->pd_lbn;
4549 if ((dap->da_stateda_list.wk_state & (MKDIR_BODY0x0020 | COMPLETE0x0004)) != COMPLETE0x0004) {
4550 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4551 panic("softdep_fsync: dirty");
4552 }
4553 if ((dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) ||
4554 (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800))
4555 flushparent = 1;
4556 else
4557 flushparent = 0;
4558 /*
4559 * If we are being fsync'ed as part of vgone'ing this vnode,
4560 * then we will not be able to release and recover the
4561 * vnode below, so we just have to give up on writing its
4562 * directory entry out. It will eventually be written, just
4563 * not now, but then the user was not asking to have it
4564 * written, so we are not breaking any promises.
4565 */
4566 mtx_enter(&vnode_mtx);
4567 if (vp->v_lflag & VXLOCK0x0100) {
4568 mtx_leave(&vnode_mtx);
4569 break;
4570 }
4571 mtx_leave(&vnode_mtx);
4572 /*
4573 * We prevent deadlock by always fetching inodes from the
4574 * root, moving down the directory tree. Thus, when fetching
4575 * our parent directory, we must unlock ourselves before
4576 * requesting the lock on our parent. See the comment in
4577 * ufs_lookup for details on possible races.
4578 */
4579 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4580 VOP_UNLOCK(vp);
4581 error = VFS_VGET(mnt, parentino, &pvp)(*(mnt)->mnt_op->vfs_vget)(mnt, parentino, &pvp);
4582 vn_lock(vp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
4583 if (error != 0)
4584 return (error);
4585 /*
4586 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
4587 * that are contained in direct blocks will be resolved by
4588 * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
4589 * may require a complete sync'ing of the directory. So, we
4590 * try the cheap and fast UFS_UPDATE first, and if that fails,
4591 * then we do the slower VOP_FSYNC of the directory.
4592 */
4593 pip = VTOI(pvp)((struct inode *)(pvp)->v_data);
4594 if (flushparent) {
4595 error = UFS_UPDATE(pip, 1)((pip)->i_vtbl->iv_update)((pip), (1));
4596 if (error) {
4597 vput(pvp);
4598 return (error);
4599 }
4600 if (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) {
4601 error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT1, p);
4602 if (error) {
4603 vput(pvp);
4604 return (error);
4605 }
4606 }
4607 }
4608 /*
4609 * Flush directory page containing the inode's name.
4610 */
4611 error = bread(pvp, lbn, fs->fs_bsize, &bp);
4612 if (error == 0) {
4613 bp->b_bcount = blksize(fs, pip, lbn)(((lbn) >= 12 || ((((pip))->i_ump->um_fstype == 1) ?
((pip))->dinode_u.ffs1_din->di_size : ((pip))->dinode_u
.ffs2_din->di_size) >= ((lbn) + 1) << (fs)->fs_bshift
) ? (u_int64_t)(fs)->fs_bsize : ((((((((((pip))->i_ump->
um_fstype == 1) ? ((pip))->dinode_u.ffs1_din->di_size :
((pip))->dinode_u.ffs2_din->di_size)) & (fs)->fs_qbmask
)) + (fs)->fs_qfmask) & (fs)->fs_fmask)))
;
4614 error = bwrite(bp);
4615 } else
4616 brelse(bp);
4617 vput(pvp);
4618 if (error != 0)
4619 return (error);
4620 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4621 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
4622 break;
4623 }
4624 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4625 return (0);
4626}
4627
4628/*
4629 * Flush all the dirty bitmaps associated with the block device
4630 * before flushing the rest of the dirty blocks so as to reduce
4631 * the number of dependencies that will have to be rolled back.
4632 */
4633void
4634softdep_fsync_mountdev(struct vnode *vp, int waitfor)
4635{
4636 struct buf *bp, *nbp;
4637 struct worklist *wk;
4638
4639 if (!vn_isdisk(vp, NULL((void *)0)))
4640 panic("softdep_fsync_mountdev: vnode not a disk");
4641 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4642 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp)for ((bp) = ((&vp->v_dirtyblkhd)->lh_first); (bp) &&
((nbp) = ((bp)->b_vnbufs.le_next), 1); (bp) = (nbp))
{
4643 /*
4644 * If it is already scheduled, skip to the next buffer.
4645 */
4646 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4647 if (bp->b_flags & B_BUSY0x00000010)
4648 continue;
4649
4650 if ((bp->b_flags & B_DELWRI0x00000080) == 0) {
4651 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4652 panic("softdep_fsync_mountdev: not dirty");
4653 }
4654 /*
4655 * We are only interested in bitmaps with outstanding
4656 * dependencies.
4657 */
4658 if ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) == NULL((void *)0) ||
4659 wk->wk_type != D_BMSAFEMAP3) {
4660 continue;
4661 }
4662 bremfreebufcache_take(bp);
4663 buf_acquire(bp);
4664 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4665 (void) bawrite(bp);
4666 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4667 /*
4668 * Since we may have slept during the I/O, we need
4669 * to start from a known point.
4670 */
4671 nbp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4672 }
4673 if (waitfor == MNT_WAIT1)
4674 drain_output(vp, 1);
4675 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4676}
4677
4678/*
4679 * This routine is called when we are trying to synchronously flush a
4680 * file. This routine must eliminate any filesystem metadata dependencies
4681 * so that the syncing routine can succeed by pushing the dirty blocks
4682 * associated with the file. If any I/O errors occur, they are returned.
4683 */
4684int
4685softdep_sync_metadata(struct vop_fsync_args *ap)
4686{
4687 struct vnode *vp = ap->a_vp;
4688 struct pagedep *pagedep;
4689 struct allocdirect *adp;
4690 struct allocindir *aip;
4691 struct buf *bp, *nbp;
4692 struct worklist *wk;
4693 int i, gotit, error, waitfor;
4694
4695 /*
4696 * Check whether this vnode is involved in a filesystem
4697 * that is doing soft dependency processing.
4698 */
4699 if (!vn_isdisk(vp, NULL((void *)0))) {
4700 if (!DOINGSOFTDEP(vp)((vp)->v_mount->mnt_flag & 0x04000000))
4701 return (0);
4702 } else
4703 if (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint == NULL((void *)0) ||
4704 (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint->mnt_flag & MNT_SOFTDEP0x04000000) == 0)
4705 return (0);
4706 /*
4707 * Ensure that any direct block dependencies have been cleared.
4708 */
4709 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4710 if ((error = flush_inodedep_deps(VTOI(vp)((struct inode *)(vp)->v_data)->i_fsinode_u.fs, VTOI(vp)((struct inode *)(vp)->v_data)->i_number))) {
4711 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4712 return (error);
4713 }
4714 /*
4715 * For most files, the only metadata dependencies are the
4716 * cylinder group maps that allocate their inode or blocks.
4717 * The block allocation dependencies can be found by traversing
4718 * the dependency lists for any buffers that remain on their
4719 * dirty buffer list. The inode allocation dependency will
4720 * be resolved when the inode is updated with MNT_WAIT.
4721 * This work is done in two passes. The first pass grabs most
4722 * of the buffers and begins asynchronously writing them. The
4723 * only way to wait for these asynchronous writes is to sleep
4724 * on the filesystem vnode which may stay busy for a long time
4725 * if the filesystem is active. So, instead, we make a second
4726 * pass over the dependencies blocking on each write. In the
4727 * usual case we will be blocking against a write that we
4728 * initiated, so when it is done the dependency will have been
4729 * resolved. Thus the second pass is expected to end quickly.
4730 */
4731 waitfor = MNT_NOWAIT2;
4732top:
4733 /*
4734 * We must wait for any I/O in progress to finish so that
4735 * all potential buffers on the dirty list will be visible.
4736 */
4737 drain_output(vp, 1);
4738 bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4739 gotit = getdirtybuf(bp, MNT_WAIT1);
4740 if (gotit == 0) {
4741 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4742 return (0);
4743 } else if (gotit == -1)
4744 goto top;
4745loop:
4746 /*
4747 * As we hold the buffer locked, none of its dependencies
4748 * will disappear.
4749 */
4750 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
4751 switch (wk->wk_type) {
4752
4753 case D_ALLOCDIRECT4:
4754 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
4755 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4756 break;
4757 nbp = adp->ad_buf;
4758 gotit = getdirtybuf(nbp, waitfor);
4759 if (gotit == 0)
4760 break;
4761 else if (gotit == -1)
4762 goto loop;
4763 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4764 if (waitfor == MNT_NOWAIT2) {
4765 bawrite(nbp);
4766 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4767 bawrite(bp);
4768 return (error);
4769 }
4770 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4771 break;
4772
4773 case D_ALLOCINDIR6:
4774 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
4775 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4776 break;
4777 nbp = aip->ai_buf;
4778 gotit = getdirtybuf(nbp, waitfor);
4779 if (gotit == 0)
4780 break;
4781 else if (gotit == -1)
4782 goto loop;
4783 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4784 if (waitfor == MNT_NOWAIT2) {
4785 bawrite(nbp);
4786 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4787 bawrite(bp);
4788 return (error);
4789 }
4790 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4791 break;
4792
4793 case D_INDIRDEP5:
4794 restart:
4795
4796 LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next)for((aip) = ((&((struct indirdep *)(wk))->ir_deplisthd
)->lh_first); (aip)!= ((void *)0); (aip) = ((aip)->ai_next
.le_next))
{
4797 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4798 continue;
4799 nbp = aip->ai_buf;
4800 if (getdirtybuf(nbp, MNT_WAIT1) <= 0)
4801 goto restart;
4802 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4803 if ((error = VOP_BWRITE(nbp)) != 0) {
4804 bawrite(bp);
4805 return (error);
4806 }
4807 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4808 goto restart;
4809 }
4810 break;
4811
4812 case D_INODEDEP1:
4813 if ((error = flush_inodedep_deps(WK_INODEDEP(wk)((struct inodedep *)(wk))->id_fs,
4814 WK_INODEDEP(wk)((struct inodedep *)(wk))->id_ino)) != 0) {
4815 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4816 bawrite(bp);
4817 return (error);
4818 }
4819 break;
4820
4821 case D_PAGEDEP0:
4822 /*
4823 * We are trying to sync a directory that may
4824 * have dependencies on both its own metadata
4825 * and/or dependencies on the inodes of any
4826 * recently allocated files. We walk its diradd
4827 * lists pushing out the associated inode.
4828 */
4829 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
4830 for (i = 0; i < DAHASHSZ6; i++) {
4831 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) ==
4832 NULL((void *)0))
4833 continue;
4834 if ((error =
4835 flush_pagedep_deps(vp, pagedep->pd_mnt,
4836 &pagedep->pd_diraddhd[i]))) {
4837 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4838 bawrite(bp);
4839 return (error);
4840 }
4841 }
4842 break;
4843
4844 case D_MKDIR11:
4845 /*
4846 * This case should never happen if the vnode has
4847 * been properly sync'ed. However, if this function
4848 * is used at a place where the vnode has not yet
4849 * been sync'ed, this dependency can show up. So,
4850 * rather than panic, just flush it.
4851 */
4852 nbp = WK_MKDIR(wk)((struct mkdir *)(wk))->md_buf;
4853 KASSERT(bp != nbp)((bp != nbp) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/ufs/ffs/ffs_softdep.c"
, 4853, "bp != nbp"))
;
4854 gotit = getdirtybuf(nbp, waitfor);
4855 if (gotit == 0)
4856 break;
4857 else if (gotit == -1)
4858 goto loop;
4859 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4860 if (waitfor == MNT_NOWAIT2) {
4861 bawrite(nbp);
4862 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4863 bawrite(bp);
4864 return (error);
4865 }
4866 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4867 break;
4868
4869 case D_BMSAFEMAP3:
4870 /*
4871 * This case should never happen if the vnode has
4872 * been properly sync'ed. However, if this function
4873 * is used at a place where the vnode has not yet
4874 * been sync'ed, this dependency can show up. So,
4875 * rather than panic, just flush it.
4876 */
4877 nbp = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk))->sm_buf;
4878 if (bp == nbp)
4879 break;
4880 gotit = getdirtybuf(nbp, waitfor);
4881 if (gotit == 0)
4882 break;
4883 else if (gotit == -1)
4884 goto loop;
4885 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4886 if (waitfor == MNT_NOWAIT2) {
4887 bawrite(nbp);
4888 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4889 bawrite(bp);
4890 return (error);
4891 }
4892 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4893 break;
4894
4895 default:
4896 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4897 panic("softdep_sync_metadata: Unknown type %s",
4898 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4899 /* NOTREACHED */
4900 }
4901 }
4902 do {
4903 nbp = LIST_NEXT(bp, b_vnbufs)((bp)->b_vnbufs.le_next);
4904 gotit = getdirtybuf(nbp, MNT_WAIT1);
4905 } while (gotit == -1);
4906 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4907 bawrite(bp);
4908 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4909 if (nbp != NULL((void *)0)) {
4910 bp = nbp;
4911 goto loop;
4912 }
4913 /*
4914 * The brief unlock is to allow any pent up dependency
4915 * processing to be done. Then proceed with the second pass.
4916 */
4917 if (waitfor == MNT_NOWAIT2) {
4918 waitfor = MNT_WAIT1;
4919 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4920 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4921 goto top;
4922 }
4923
4924 /*
4925 * If we have managed to get rid of all the dirty buffers,
4926 * then we are done. For certain directories and block
4927 * devices, we may need to do further work.
4928 *
4929 * We must wait for any I/O in progress to finish so that
4930 * all potential buffers on the dirty list will be visible.
4931 */
4932 drain_output(vp, 1);
4933 if (LIST_EMPTY(&vp->v_dirtyblkhd)(((&vp->v_dirtyblkhd)->lh_first) == ((void *)0))) {
4934 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4935 return (0);
4936 }
4937
4938 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4939 /*
4940 * If we are trying to sync a block device, some of its buffers may
4941 * contain metadata that cannot be written until the contents of some
4942 * partially written files have been written to disk. The only easy
4943 * way to accomplish this is to sync the entire filesystem (luckily
4944 * this happens rarely).
4945 */
4946 if (vn_isdisk(vp, NULL((void *)0)) &&
4947 vp->v_specmountpointv_un.vu_specinfo->si_mountpoint && !VOP_ISLOCKED(vp) &&
4948 (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, 0, ap->a_cred,(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
4949 ap->a_p)(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
) != 0)
4950 return (error);
4951 return (0);
4952}
4953
4954/*
4955 * Flush the dependencies associated with an inodedep.
4956 * Called with splbio blocked.
4957 */
4958STATIC int
4959flush_inodedep_deps(struct fs *fs, ufsino_t ino)
4960{
4961 struct inodedep *inodedep;
4962 struct allocdirect *adp;
4963 int gotit, error, waitfor;
4964 struct buf *bp;
4965
4966 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
4967
4968 /*
4969 * This work is done in two passes. The first pass grabs most
4970 * of the buffers and begins asynchronously writing them. The
4971 * only way to wait for these asynchronous writes is to sleep
4972 * on the filesystem vnode which may stay busy for a long time
4973 * if the filesystem is active. So, instead, we make a second
4974 * pass over the dependencies blocking on each write. In the
4975 * usual case we will be blocking against a write that we
4976 * initiated, so when it is done the dependency will have been
4977 * resolved. Thus the second pass is expected to end quickly.
4978 * We give a brief window at the top of the loop to allow
4979 * any pending I/O to complete.
4980 */
4981 for (waitfor = MNT_NOWAIT2; ; ) {
4982 retry_ino:
4983 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4984 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
4985 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
4986 return (0);
4987 TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next)for((adp) = ((&inodedep->id_inoupdt)->tqh_first); (
adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
4988 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4989 continue;
4990 bp = adp->ad_buf;
4991 gotit = getdirtybuf(bp, waitfor);
4992 if (gotit == 0) {
4993 if (waitfor == MNT_NOWAIT2)
4994 continue;
4995 break;
4996 } else if (gotit == -1)
4997 goto retry_ino;
4998 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4999 if (waitfor == MNT_NOWAIT2) {
5000 bawrite(bp);
5001 } else if ((error = VOP_BWRITE(bp)) != 0) {
5002 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5003 return (error);
5004 }
5005 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5006 break;
5007 }
5008 if (adp != NULL((void *)0))
5009 continue;
5010 retry_newino:
5011 TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next)for((adp) = ((&inodedep->id_newinoupdt)->tqh_first)
; (adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
5012 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
5013 continue;
5014 bp = adp->ad_buf;
5015 gotit = getdirtybuf(bp, waitfor);
5016 if (gotit == 0) {
5017 if (waitfor == MNT_NOWAIT2)
5018 continue;
5019 break;
5020 } else if (gotit == -1)
5021 goto retry_newino;
5022 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5023 if (waitfor == MNT_NOWAIT2) {
5024 bawrite(bp);
5025 } else if ((error = VOP_BWRITE(bp)) != 0) {
5026 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5027 return (error);
5028 }
5029 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5030 break;
5031 }
5032 if (adp != NULL((void *)0))
5033 continue;
5034 /*
5035 * If pass2, we are done, otherwise do pass 2.
5036 */
5037 if (waitfor == MNT_WAIT1)
5038 break;
5039 waitfor = MNT_WAIT1;
5040 }
5041 /*
5042 * Try freeing inodedep in case all dependencies have been removed.
5043 */
5044 if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
5045 (void) free_inodedep(inodedep);
5046 return (0);
5047}
5048
5049/*
5050 * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
5051 * Called with splbio blocked.
5052 */
5053STATIC int
5054flush_pagedep_deps(struct vnode *pvp, struct mount *mp,
5055 struct diraddhd *diraddhdp)
5056{
5057 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
5058 struct worklist *wk;
5059 struct inodedep *inodedep;
5060 struct ufsmount *ump;
5061 struct diradd *dap;
5062 struct vnode *vp;
5063 int gotit, error = 0;
5064 struct buf *bp;
5065 ufsino_t inum;
5066
5067 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x6, __func__
); } } while (0)
;
5068
5069 ump = VFSTOUFS(mp)((struct ufsmount *)((mp)->mnt_data));
5070 while ((dap = LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) != NULL((void *)0)) {
5071 /*
5072 * Flush ourselves if this directory entry
5073 * has a MKDIR_PARENT dependency.
5074 */
5075 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5076 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5077 if ((error = UFS_UPDATE(VTOI(pvp), 1)((((struct inode *)(pvp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(pvp)->v_data)), (1))
))
5078 break;
5079 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5080 /*
5081 * If that cleared dependencies, go on to next.
5082 */
5083 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5084 continue;
5085 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5086 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5087 panic("flush_pagedep_deps: MKDIR_PARENT");
5088 }
5089 }
5090 /*
5091 * A newly allocated directory must have its "." and
5092 * ".." entries written out before its name can be
5093 * committed in its parent. We do not want or need
5094 * the full semantics of a synchronous VOP_FSYNC as
5095 * that may end up here again, once for each directory
5096 * level in the filesystem. Instead, we push the blocks
5097 * and wait for them to clear. We have to fsync twice
5098 * because the first call may choose to defer blocks
5099 * that still have dependencies, but deferral will
5100 * happen at most once.
5101 */
5102 inum = dap->da_newinum;
5103 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5104 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5105 if ((error = VFS_VGET(mp, inum, &vp)(*(mp)->mnt_op->vfs_vget)(mp, inum, &vp)) != 0)
5106 break;
5107 if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p)) ||
5108 (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p))) {
5109 vput(vp);
5110 break;
5111 }
5112 drain_output(vp, 0);
5113 /*
5114 * If first block is still dirty with a D_MKDIR
5115 * dependency then it needs to be written now.
5116 */
5117 for (;;) {
5118 error = 0;
5119 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5120 bp = incore(vp, 0);
5121 if (bp == NULL((void *)0)) {
5122 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5123 break;
5124 }
5125 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
5126 if (wk->wk_type == D_MKDIR11)
5127 break;
5128 if (wk) {
5129 gotit = getdirtybuf(bp, MNT_WAIT1);
5130 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5131 if (gotit == -1)
5132 continue;
5133 if (gotit && (error = bwrite(bp)) != 0)
5134 break;
5135 } else
5136 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5137 break;
5138 }
5139 vput(vp);
5140 /* Flushing of first block failed */
5141 if (error)
5142 break;
5143 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5144 /*
5145 * If that cleared dependencies, go on to next.
5146 */
5147 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5148 continue;
5149 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5150 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5151 panic("flush_pagedep_deps: MKDIR_BODY");
5152 }
5153 }
5154 /*
5155 * Flush the inode on which the directory entry depends.
5156 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
5157 * the only remaining dependency is that the updated inode
5158 * count must get pushed to disk. The inode has already
5159 * been pushed into its inode buffer (via VOP_UPDATE) at
5160 * the time of the reference count change. So we need only
5161 * locate that buffer, ensure that there will be no rollback
5162 * caused by a bitmap dependency, then write the inode buffer.
5163 */
5164 if (inodedep_lookup(ump->um_fsufsmount_u.fs, inum, 0, &inodedep) == 0) {
5165 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5166 panic("flush_pagedep_deps: lost inode");
5167 }
5168 /*
5169 * If the inode still has bitmap dependencies,
5170 * push them to disk.
5171 */
5172 retry:
5173 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
5174 bp = inodedep->id_buf;
5175 gotit = getdirtybuf(bp, MNT_WAIT1);
5176 if (gotit == -1)
5177 goto retry;
5178 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5179 if (gotit && (error = bwrite(bp)) != 0)
5180 break;
5181 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5182 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5183 continue;
5184 }
5185 /*
5186 * If the inode is still sitting in a buffer waiting
5187 * to be written, push it to disk.
5188 */
5189 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5190 if ((error = bread(ump->um_devvp,
5191 fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum))((((daddr_t)(((((daddr_t)(ump->ufsmount_u.fs)->fs_fpg *
(((inum) / (ump->ufsmount_u.fs)->fs_ipg))) + (ump->
ufsmount_u.fs)->fs_cgoffset * ((((inum) / (ump->ufsmount_u
.fs)->fs_ipg)) & ~((ump->ufsmount_u.fs)->fs_cgmask
))) + (ump->ufsmount_u.fs)->fs_iblkno) + ((((((inum) % (
ump->ufsmount_u.fs)->fs_ipg) / ((ump->ufsmount_u.fs)
->fs_inopb))) << ((ump->ufsmount_u.fs))->fs_fragshift
))))) << (ump->ufsmount_u.fs)->fs_fsbtodb)
,
5192 (int)ump->um_fsufsmount_u.fs->fs_bsize, &bp)) != 0) {
5193 brelse(bp);
5194 break;
5195 }
5196 if ((error = bwrite(bp)) != 0)
5197 break;
5198 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5199 /*
5200 * If we have failed to get rid of all the dependencies
5201 * then something is seriously wrong.
5202 */
5203 if (dap == LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) {
5204 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5205 panic("flush_pagedep_deps: flush failed");
5206 }
5207 }
5208 if (error)
5209 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5210 return (error);
5211}
5212
5213/*
5214 * A large burst of file addition or deletion activity can drive the
5215 * memory load excessively high. First attempt to slow things down
5216 * using the techniques below. If that fails, this routine requests
5217 * the offending operations to fall back to running synchronously
5218 * until the memory load returns to a reasonable level.
5219 */
5220int
5221softdep_slowdown(struct vnode *vp)
5222{
5223 int max_softdeps_hard;
5224
5225 max_softdeps_hard = max_softdeps * 11 / 10;
5226 if (num_dirrem < max_softdeps_hard / 2 &&
5227 num_inodedep < max_softdeps_hard)
5228 return (0);
5229 stat_sync_limit_hit += 1;
5230 return (1);
5231}
5232
5233/*
5234 * If memory utilization has gotten too high, deliberately slow things
5235 * down and speed up the I/O processing.
5236 */
5237STATIC int
5238request_cleanup(int resource, int islocked)
5239{
5240 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
5241 int s;
5242
5243 /*
5244 * We never hold up the filesystem syncer process.
5245 */
5246 if (p == filesys_syncer || (p->p_flag & P_SOFTDEP0x10000000))
5247 return (0);
5248 /*
5249 * First check to see if the work list has gotten backlogged.
5250 * If it has, co-opt this process to help clean up two entries.
5251 * Because this process may hold inodes locked, we cannot
5252 * handle any remove requests that might block on a locked
5253 * inode as that could lead to deadlock. We set P_SOFTDEP
5254 * to avoid recursively processing the worklist.
5255 */
5256 if (num_on_worklist > max_softdeps / 10) {
5257 atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_SOFTDEP0x10000000);
5258 if (islocked)
5259 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5260 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5261 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5262 atomic_clearbits_intx86_atomic_clearbits_u32(&p->p_flag, P_SOFTDEP0x10000000);
5263 stat_worklist_push += 2;
5264 if (islocked)
5265 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5266 return(1);
5267 }
5268 /*
5269 * Next, we attempt to speed up the syncer process. If that
5270 * is successful, then we allow the process to continue.
5271 */
5272 if (speedup_syncer())
5273 return(0);
5274 /*
5275 * If we are resource constrained on inode dependencies, try
5276 * flushing some dirty inodes. Otherwise, we are constrained
5277 * by file deletions, so try accelerating flushes of directories
5278 * with removal dependencies. We would like to do the cleanup
5279 * here, but we probably hold an inode locked at this point and
5280 * that might deadlock against one that we try to clean. So,
5281 * the best that we can do is request the syncer daemon to do
5282 * the cleanup for us.
5283 */
5284 switch (resource) {
5285
5286 case FLUSH_INODES1:
5287 stat_ino_limit_push += 1;
5288 req_clear_inodedeps += 1;
5289 stat_countp = &stat_ino_limit_hit;
5290 break;
5291
5292 case FLUSH_REMOVE2:
5293 stat_blk_limit_push += 1;
5294 req_clear_remove += 1;
5295 stat_countp = &stat_blk_limit_hit;
5296 break;
5297
5298 default:
5299 if (islocked)
5300 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5301 panic("request_cleanup: unknown type");
5302 }
5303 /*
5304 * Hopefully the syncer daemon will catch up and awaken us.
5305 * We wait at most tickdelay before proceeding in any case.
5306 */
5307 if (islocked == 0)
5308 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x6);
5309 proc_waiting += 1;
5310 if (!timeout_pending(&proc_waiting_timeout)((&proc_waiting_timeout)->to_flags & 0x02))
5311 timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
5312
5313 s = FREE_LOCK_INTERLOCKED(&lk)((&lk)->lkt_spl);
5314 tsleep_nsec(&proc_waiting, PPAUSE40, "softupdate", INFSLP0xffffffffffffffffULL);
5315 ACQUIRE_LOCK_INTERLOCKED(&lk, s)(&lk)->lkt_spl = (s);
5316 proc_waiting -= 1;
5317 if (islocked == 0)
5318 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5319 return (1);
5320}
5321
5322/*
5323 * Awaken processes pausing in request_cleanup and clear proc_waiting
5324 * to indicate that there is no longer a timer running.