Bug Summary

File:ufs/ffs/ffs_softdep.c
Warning:line 4531, column 18
Although the value stored to 'flushparent' is used in the enclosing expression, the value is never actually read from 'flushparent'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ffs_softdep.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/ufs/ffs/ffs_softdep.c
1/* $OpenBSD: ffs_softdep.c,v 1.152 2023/07/05 15:13:28 beck Exp $ */
2
3/*
4 * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
5 *
6 * The soft updates code is derived from the appendix of a University
7 * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
8 * "Soft Updates: A Solution to the Metadata Update Problem in File
9 * Systems", CSE-TR-254-95, August 1995).
10 *
11 * Further information about soft updates can be obtained from:
12 *
13 * Marshall Kirk McKusick http://www.mckusick.com/softdep/
14 * 1614 Oxford Street mckusick@mckusick.com
15 * Berkeley, CA 94709-1608 +1-510-843-9542
16 * USA
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 *
22 * 1. Redistributions of source code must retain the above copyright
23 * notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 * notice, this list of conditions and the following disclaimer in the
26 * documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
29 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
30 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
32 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
41 * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.86 2001/02/04 16:08:18 phk Exp $
42 */
43
44#include <sys/param.h>
45#include <sys/buf.h>
46#include <sys/kernel.h>
47#include <sys/malloc.h>
48#include <sys/mount.h>
49#include <sys/proc.h>
50#include <sys/pool.h>
51#include <sys/syslog.h>
52#include <sys/systm.h>
53#include <sys/vnode.h>
54#include <sys/specdev.h>
55#include <crypto/siphash.h>
56#include <ufs/ufs/dir.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufsmount.h>
60#include <ufs/ffs/fs.h>
61#include <ufs/ffs/softdep.h>
62#include <ufs/ffs/ffs_extern.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#define STATIC
66
67/*
68 * Mapping of dependency structure types to malloc types.
69 */
70#define D_PAGEDEP0 0
71#define D_INODEDEP1 1
72#define D_NEWBLK2 2
73#define D_BMSAFEMAP3 3
74#define D_ALLOCDIRECT4 4
75#define D_INDIRDEP5 5
76#define D_ALLOCINDIR6 6
77#define D_FREEFRAG7 7
78#define D_FREEBLKS8 8
79#define D_FREEFILE9 9
80#define D_DIRADD10 10
81#define D_MKDIR11 11
82#define D_DIRREM12 12
83#define D_NEWDIRBLK13 13
84#define D_LAST13 13
85/*
86 * Names of softdep types.
87 */
88const char *softdep_typenames[] = {
89 "pagedep",
90 "inodedep",
91 "newblk",
92 "bmsafemap",
93 "allocdirect",
94 "indirdep",
95 "allocindir",
96 "freefrag",
97 "freeblks",
98 "freefile",
99 "diradd",
100 "mkdir",
101 "dirrem",
102 "newdirblk",
103};
104#define TYPENAME(type)((unsigned)(type) <= 13 ? softdep_typenames[type] : "???") \
105 ((unsigned)(type) <= D_LAST13 ? softdep_typenames[type] : "???")
106/*
107 * Finding the current process.
108 */
109#define CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
110/*
111 * End system adaptation definitions.
112 */
113
114/*
115 * Internal function prototypes.
116 */
117STATIC void softdep_error(char *, int);
118STATIC void drain_output(struct vnode *, int);
119STATIC int getdirtybuf(struct buf *, int);
120STATIC void clear_remove(struct proc *);
121STATIC void clear_inodedeps(struct proc *);
122STATIC int flush_pagedep_deps(struct vnode *, struct mount *,
123 struct diraddhd *);
124STATIC int flush_inodedep_deps(struct fs *, ufsino_t);
125STATIC int handle_written_filepage(struct pagedep *, struct buf *);
126STATIC void diradd_inode_written(struct diradd *, struct inodedep *);
127STATIC int handle_written_inodeblock(struct inodedep *, struct buf *);
128STATIC void handle_allocdirect_partdone(struct allocdirect *);
129STATIC void handle_allocindir_partdone(struct allocindir *);
130STATIC void initiate_write_filepage(struct pagedep *, struct buf *);
131STATIC void handle_written_mkdir(struct mkdir *, int);
132STATIC void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
133#ifdef FFS21
134STATIC void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
135#endif
136STATIC void handle_workitem_freefile(struct freefile *);
137STATIC void handle_workitem_remove(struct dirrem *);
138STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
139 struct inode *, int, struct dirrem **);
140STATIC void free_diradd(struct diradd *);
141STATIC void free_allocindir(struct allocindir *, struct inodedep *);
142STATIC void free_newdirblk(struct newdirblk *);
143STATIC int indir_trunc(struct inode *, daddr_t, int, daddr_t, long *);
144STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
145STATIC void free_allocdirect(struct allocdirectlst *,
146 struct allocdirect *, int);
147STATIC int check_inode_unwritten(struct inodedep *);
148STATIC int free_inodedep(struct inodedep *);
149STATIC void handle_workitem_freeblocks(struct freeblks *);
150STATIC void merge_inode_lists(struct inodedep *);
151STATIC void setup_allocindir_phase2(struct buf *, struct inode *,
152 struct allocindir *);
153STATIC struct allocindir *newallocindir(struct inode *, int, daddr_t,
154 daddr_t);
155STATIC void handle_workitem_freefrag(struct freefrag *);
156STATIC struct freefrag *newfreefrag(struct inode *, daddr_t, long);
157STATIC void allocdirect_merge(struct allocdirectlst *,
158 struct allocdirect *, struct allocdirect *);
159STATIC struct bmsafemap *bmsafemap_lookup(struct buf *);
160STATIC int newblk_lookup(struct fs *, daddr_t, int,
161 struct newblk **);
162STATIC int inodedep_lookup(struct fs *, ufsino_t, int, struct inodedep **);
163STATIC int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
164STATIC void pause_timer(void *);
165STATIC int request_cleanup(int, int);
166STATIC int process_worklist_item(struct mount *, int *, int);
167STATIC void add_to_worklist(struct worklist *);
168
169/*
170 * Exported softdep operations.
171 */
172void softdep_disk_io_initiation(struct buf *);
173void softdep_disk_write_complete(struct buf *);
174void softdep_deallocate_dependencies(struct buf *);
175void softdep_move_dependencies(struct buf *, struct buf *);
176int softdep_count_dependencies(struct buf *bp, int, int);
177
178/*
179 * Locking primitives.
180 *
181 * For a uniprocessor, all we need to do is protect against disk
182 * interrupts. For a multiprocessor, this lock would have to be
183 * a mutex. A single mutex is used throughout this file, though
184 * finer grain locking could be used if contention warranted it.
185 *
186 * For a multiprocessor, the sleep call would accept a lock and
187 * release it after the sleep processing was complete. In a uniprocessor
188 * implementation there is no such interlock, so we simple mark
189 * the places where it needs to be done with the `interlocked' form
190 * of the lock calls. Since the uniprocessor sleep already interlocks
191 * the spl, there is nothing that really needs to be done.
192 */
193#ifndef /* NOT */ DEBUG
194STATIC struct lockit {
195 int lkt_spl;
196} lk = { 0 };
197#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x3) (lk)->lkt_spl = splbio()splraise(0x3)
198#define FREE_LOCK(lk)spllower((lk)->lkt_spl) splx((lk)->lkt_spl)spllower((lk)->lkt_spl)
199#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) (lk)->lkt_spl = (s)
200#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) ((lk)->lkt_spl)
201
202#else /* DEBUG */
203STATIC struct lockit {
204 int lkt_spl;
205 pid_t lkt_held;
206 int lkt_line;
207} lk = { 0, -1 };
208STATIC int lockcnt;
209
210STATIC void acquire_lock(struct lockit *, int);
211STATIC void free_lock(struct lockit *, int);
212STATIC void acquire_lock_interlocked(struct lockit *, int, int);
213STATIC int free_lock_interlocked(struct lockit *, int);
214
215#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x3) acquire_lock(lk, __LINE__215)
216#define FREE_LOCK(lk)spllower((lk)->lkt_spl) free_lock(lk, __LINE__216)
217#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) acquire_lock_interlocked(lk, (s), __LINE__217)
218#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) free_lock_interlocked(lk, __LINE__218)
219
220STATIC void
221acquire_lock(struct lockit *lk, int line)
222{
223 pid_t holder;
224 int original_line;
225
226 if (lk->lkt_held != -1) {
227 holder = lk->lkt_held;
228 original_line = lk->lkt_line;
229 FREE_LOCK(lk)spllower((lk)->lkt_spl);
230 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
231 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
232 else
233 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
234 }
235 lk->lkt_spl = splbio()splraise(0x3);
236 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
237 lk->lkt_line = line;
238 lockcnt++;
239}
240
241STATIC void
242free_lock(struct lockit *lk, int line)
243{
244
245 if (lk->lkt_held == -1)
246 panic("softdep_unlock: lock not held at line %d", line);
247 lk->lkt_held = -1;
248 splx(lk->lkt_spl)spllower(lk->lkt_spl);
249}
250
251STATIC void
252acquire_lock_interlocked(struct lockit *lk, int s, int line)
253{
254 pid_t holder;
255 int original_line;
256
257 if (lk->lkt_held != -1) {
258 holder = lk->lkt_held;
259 original_line = lk->lkt_line;
260 FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl);
261 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
262 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
263 else
264 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
265 }
266 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
267 lk->lkt_line = line;
268 lk->lkt_spl = s;
269 lockcnt++;
270}
271
272STATIC int
273free_lock_interlocked(struct lockit *lk, int line)
274{
275
276 if (lk->lkt_held == -1)
277 panic("softdep_unlock_interlocked: lock not held at line %d", line);
278 lk->lkt_held = -1;
279
280 return (lk->lkt_spl);
281}
282#endif /* DEBUG */
283
284/*
285 * Place holder for real semaphores.
286 */
287struct sema {
288 int value;
289 pid_t holder;
290 char *name;
291 int prio;
292};
293STATIC void sema_init(struct sema *, char *, int);
294STATIC int sema_get(struct sema *, struct lockit *);
295STATIC void sema_release(struct sema *);
296
297STATIC void
298sema_init(struct sema *semap, char *name, int prio)
299{
300
301 semap->holder = -1;
302 semap->value = 0;
303 semap->name = name;
304 semap->prio = prio;
305}
306
307STATIC int
308sema_get(struct sema *semap, struct lockit *interlock)
309{
310 int s;
311
312 if (semap->value++ > 0) {
313 if (interlock != NULL((void *)0))
314 s = FREE_LOCK_INTERLOCKED(interlock)((interlock)->lkt_spl);
315 tsleep_nsec(semap, semap->prio, semap->name, INFSLP0xffffffffffffffffULL);
316 if (interlock != NULL((void *)0)) {
317 ACQUIRE_LOCK_INTERLOCKED(interlock, s)(interlock)->lkt_spl = (s);
318 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
319 }
320 return (0);
321 }
322 semap->holder = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
323 if (interlock != NULL((void *)0))
324 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
325 return (1);
326}
327
328STATIC void
329sema_release(struct sema *semap)
330{
331
332 if (semap->value <= 0 || semap->holder != CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid) {
333#ifdef DEBUG
334 if (lk.lkt_held != -1)
335 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
336#endif
337 panic("sema_release: not held");
338 }
339 if (--semap->value > 0) {
340 semap->value = 0;
341 wakeup(semap);
342 }
343 semap->holder = -1;
344}
345
346/*
347 * Memory management.
348 */
349STATIC struct pool pagedep_pool;
350STATIC struct pool inodedep_pool;
351STATIC struct pool newblk_pool;
352STATIC struct pool bmsafemap_pool;
353STATIC struct pool allocdirect_pool;
354STATIC struct pool indirdep_pool;
355STATIC struct pool allocindir_pool;
356STATIC struct pool freefrag_pool;
357STATIC struct pool freeblks_pool;
358STATIC struct pool freefile_pool;
359STATIC struct pool diradd_pool;
360STATIC struct pool mkdir_pool;
361STATIC struct pool dirrem_pool;
362STATIC struct pool newdirblk_pool;
363
364static __inline void
365softdep_free(struct worklist *item, int type)
366{
367
368 switch (type) {
369 case D_PAGEDEP0:
370 pool_put(&pagedep_pool, item);
371 break;
372
373 case D_INODEDEP1:
374 pool_put(&inodedep_pool, item);
375 break;
376
377 case D_BMSAFEMAP3:
378 pool_put(&bmsafemap_pool, item);
379 break;
380
381 case D_ALLOCDIRECT4:
382 pool_put(&allocdirect_pool, item);
383 break;
384
385 case D_INDIRDEP5:
386 pool_put(&indirdep_pool, item);
387 break;
388
389 case D_ALLOCINDIR6:
390 pool_put(&allocindir_pool, item);
391 break;
392
393 case D_FREEFRAG7:
394 pool_put(&freefrag_pool, item);
395 break;
396
397 case D_FREEBLKS8:
398 pool_put(&freeblks_pool, item);
399 break;
400
401 case D_FREEFILE9:
402 pool_put(&freefile_pool, item);
403 break;
404
405 case D_DIRADD10:
406 pool_put(&diradd_pool, item);
407 break;
408
409 case D_MKDIR11:
410 pool_put(&mkdir_pool, item);
411 break;
412
413 case D_DIRREM12:
414 pool_put(&dirrem_pool, item);
415 break;
416
417 case D_NEWDIRBLK13:
418 pool_put(&newdirblk_pool, item);
419 break;
420
421 default:
422#ifdef DEBUG
423 if (lk.lkt_held != -1)
424 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
425#endif
426 panic("softdep_free: unknown type %d", type);
427 }
428}
429
430struct workhead softdep_freequeue;
431
432static __inline void
433softdep_freequeue_add(struct worklist *item)
434{
435 int s;
436
437 s = splbio()splraise(0x3);
438 LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list)do { if (((item)->wk_list.le_next = (&softdep_freequeue
)->lh_first) != ((void *)0)) (&softdep_freequeue)->
lh_first->wk_list.le_prev = &(item)->wk_list.le_next
; (&softdep_freequeue)->lh_first = (item); (item)->
wk_list.le_prev = &(&softdep_freequeue)->lh_first;
} while (0)
;
439 splx(s)spllower(s);
440}
441
442static __inline void
443softdep_freequeue_process(void)
444{
445 struct worklist *wk;
446
447 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
448
449 while ((wk = LIST_FIRST(&softdep_freequeue)((&softdep_freequeue)->lh_first)) != NULL((void *)0)) {
450 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
451 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
452 softdep_free(wk, wk->wk_type);
453 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
454 }
455}
456
457/*
458 * Worklist queue management.
459 * These routines require that the lock be held.
460 */
461#ifndef /* NOT */ DEBUG
462#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
do { \
463 (item)->wk_state |= ONWORKLIST0x8000; \
464 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
; \
465} while (0)
466#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
do { \
467 (item)->wk_state &= ~ONWORKLIST0x8000; \
468 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
; \
469} while (0)
470#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) softdep_freequeue_add((struct worklist *)item)
471
472#else /* DEBUG */
473STATIC void worklist_insert(struct workhead *, struct worklist *);
474STATIC void worklist_remove(struct worklist *);
475STATIC void workitem_free(struct worklist *);
476
477#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
worklist_insert(head, item)
478#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
worklist_remove(item)
479#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) workitem_free((struct worklist *)item)
480
481STATIC void
482worklist_insert(struct workhead *head, struct worklist *item)
483{
484
485 if (lk.lkt_held == -1)
486 panic("worklist_insert: lock not held");
487 if (item->wk_state & ONWORKLIST0x8000) {
488 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
489 panic("worklist_insert: already on list");
490 }
491 item->wk_state |= ONWORKLIST0x8000;
492 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
;
493}
494
495STATIC void
496worklist_remove(struct worklist *item)
497{
498
499 if (lk.lkt_held == -1)
500 panic("worklist_remove: lock not held");
501 if ((item->wk_state & ONWORKLIST0x8000) == 0) {
502 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
503 panic("worklist_remove: not on list");
504 }
505 item->wk_state &= ~ONWORKLIST0x8000;
506 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
;
507}
508
509STATIC void
510workitem_free(struct worklist *item)
511{
512
513 if (item->wk_state & ONWORKLIST0x8000) {
514 if (lk.lkt_held != -1)
515 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
516 panic("workitem_free: still on list");
517 }
518 softdep_freequeue_add(item);
519}
520#endif /* DEBUG */
521
522/*
523 * Workitem queue management
524 */
525STATIC struct workhead softdep_workitem_pending;
526STATIC struct worklist *worklist_tail;
527STATIC int num_on_worklist; /* number of worklist items to be processed */
528STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
529STATIC int softdep_worklist_req; /* serialized waiters */
530STATIC int max_softdeps; /* maximum number of structs before slowdown */
531STATIC int tickdelay = 2; /* number of ticks to pause during slowdown */
532STATIC int proc_waiting; /* tracks whether we have a timeout posted */
533STATIC int *stat_countp; /* statistic to count in proc_waiting timeout */
534STATIC struct timeout proc_waiting_timeout;
535STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
536STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
537#define FLUSH_INODES1 1
538STATIC int req_clear_remove; /* syncer process flush some freeblks */
539#define FLUSH_REMOVE2 2
540/*
541 * runtime statistics
542 */
543STATIC int stat_worklist_push; /* number of worklist cleanups */
544STATIC int stat_blk_limit_push; /* number of times block limit neared */
545STATIC int stat_ino_limit_push; /* number of times inode limit neared */
546STATIC int stat_blk_limit_hit; /* number of times block slowdown imposed */
547STATIC int stat_ino_limit_hit; /* number of times inode slowdown imposed */
548STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
549STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
550STATIC int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
551STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
552STATIC int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
553
554/*
555 * Add an item to the end of the work queue.
556 * This routine requires that the lock be held.
557 * This is the only routine that adds items to the list.
558 * The following routine is the only one that removes items
559 * and does so in order from first to last.
560 */
561STATIC void
562add_to_worklist(struct worklist *wk)
563{
564
565 if (wk->wk_state & ONWORKLIST0x8000) {
566#ifdef DEBUG
567 if (lk.lkt_held != -1)
568 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
569#endif
570 panic("add_to_worklist: already on list");
571 }
572 wk->wk_state |= ONWORKLIST0x8000;
573 if (LIST_FIRST(&softdep_workitem_pending)((&softdep_workitem_pending)->lh_first) == NULL((void *)0))
574 LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list)do { if (((wk)->wk_list.le_next = (&softdep_workitem_pending
)->lh_first) != ((void *)0)) (&softdep_workitem_pending
)->lh_first->wk_list.le_prev = &(wk)->wk_list.le_next
; (&softdep_workitem_pending)->lh_first = (wk); (wk)->
wk_list.le_prev = &(&softdep_workitem_pending)->lh_first
; } while (0)
;
575 else
576 LIST_INSERT_AFTER(worklist_tail, wk, wk_list)do { if (((wk)->wk_list.le_next = (worklist_tail)->wk_list
.le_next) != ((void *)0)) (worklist_tail)->wk_list.le_next
->wk_list.le_prev = &(wk)->wk_list.le_next; (worklist_tail
)->wk_list.le_next = (wk); (wk)->wk_list.le_prev = &
(worklist_tail)->wk_list.le_next; } while (0)
;
577 worklist_tail = wk;
578 num_on_worklist += 1;
579}
580
581/*
582 * Process that runs once per second to handle items in the background queue.
583 *
584 * Note that we ensure that everything is done in the order in which they
585 * appear in the queue. The code below depends on this property to ensure
586 * that blocks of a file are freed before the inode itself is freed. This
587 * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
588 * until all the old ones have been purged from the dependency lists.
589 */
590int
591softdep_process_worklist(struct mount *matchmnt)
592{
593 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
594 int matchcnt, loopcount;
595 struct timeval starttime;
596
597 /*
598 * First process any items on the delayed-free queue.
599 */
600 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
601 softdep_freequeue_process();
602 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
603
604 /*
605 * Record the process identifier of our caller so that we can give
606 * this process preferential treatment in request_cleanup below.
607 * We can't do this in softdep_initialize, because the syncer doesn't
608 * have to run then.
609 * NOTE! This function _could_ be called with a curproc != syncerproc.
610 */
611 filesys_syncer = syncerproc;
612 matchcnt = 0;
613
614 /*
615 * There is no danger of having multiple processes run this
616 * code, but we have to single-thread it when softdep_flushfiles()
617 * is in operation to get an accurate count of the number of items
618 * related to its mount point that are in the list.
619 */
620 if (matchmnt == NULL((void *)0)) {
621 if (softdep_worklist_busy < 0)
622 return(-1);
623 softdep_worklist_busy += 1;
624 }
625
626 /*
627 * If requested, try removing inode or removal dependencies.
628 */
629 if (req_clear_inodedeps) {
630 clear_inodedeps(p);
631 req_clear_inodedeps -= 1;
632 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
633 }
634 if (req_clear_remove) {
635 clear_remove(p);
636 req_clear_remove -= 1;
637 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
638 }
639 loopcount = 1;
640 getmicrouptime(&starttime);
641 while (num_on_worklist > 0) {
642 if (process_worklist_item(matchmnt, &matchcnt, LK_NOWAIT0x0040UL) == 0)
643 break;
644
645 /*
646 * If a umount operation wants to run the worklist
647 * accurately, abort.
648 */
649 if (softdep_worklist_req && matchmnt == NULL((void *)0)) {
650 matchcnt = -1;
651 break;
652 }
653
654 /*
655 * If requested, try removing inode or removal dependencies.
656 */
657 if (req_clear_inodedeps) {
658 clear_inodedeps(p);
659 req_clear_inodedeps -= 1;
660 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
661 }
662 if (req_clear_remove) {
663 clear_remove(p);
664 req_clear_remove -= 1;
665 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
666 }
667 /*
668 * We do not generally want to stop for buffer space, but if
669 * we are really being a buffer hog, we will stop and wait.
670 */
671#if 0
672 if (loopcount++ % 128 == 0)
673 bwillwrite();
674#endif
675 /*
676 * Never allow processing to run for more than one
677 * second. Otherwise the other syncer tasks may get
678 * excessively backlogged.
679 */
680 {
681 struct timeval diff;
682 struct timeval tv;
683
684 getmicrouptime(&tv);
685 timersub(&tv, &starttime, &diff)do { (&diff)->tv_sec = (&tv)->tv_sec - (&starttime
)->tv_sec; (&diff)->tv_usec = (&tv)->tv_usec
- (&starttime)->tv_usec; if ((&diff)->tv_usec <
0) { (&diff)->tv_sec--; (&diff)->tv_usec += 1000000
; } } while (0)
;
686 if (diff.tv_sec != 0 && matchmnt == NULL((void *)0)) {
687 matchcnt = -1;
688 break;
689 }
690 }
691
692 /*
693 * Process any new items on the delayed-free queue.
694 */
695 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
696 softdep_freequeue_process();
697 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
698 }
699 if (matchmnt == NULL((void *)0)) {
700 softdep_worklist_busy -= 1;
701 if (softdep_worklist_req && softdep_worklist_busy == 0)
702 wakeup(&softdep_worklist_req);
703 }
704 return (matchcnt);
705}
706
707/*
708 * Process one item on the worklist.
709 */
710STATIC int
711process_worklist_item(struct mount *matchmnt, int *matchcnt, int flags)
712{
713 struct worklist *wk, *wkend;
714 struct dirrem *dirrem;
715 struct mount *mp;
716 struct vnode *vp;
717
718 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
719 /*
720 * Normally we just process each item on the worklist in order.
721 * However, if we are in a situation where we cannot lock any
722 * inodes, we have to skip over any dirrem requests whose
723 * vnodes are resident and locked.
724 */
725 LIST_FOREACH(wk, &softdep_workitem_pending, wk_list)for((wk) = ((&softdep_workitem_pending)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
{
726 if ((flags & LK_NOWAIT0x0040UL) == 0 || wk->wk_type != D_DIRREM12)
727 break;
728 dirrem = WK_DIRREM(wk)((struct dirrem *)(wk));
729 vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)((struct ufsmount *)((dirrem->dm_mnt)->mnt_data))->um_dev,
730 dirrem->dm_oldinum);
731 if (vp == NULL((void *)0) || !VOP_ISLOCKED(vp))
732 break;
733 }
734 if (wk == NULL((void *)0)) {
735 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
736 return (0);
737 }
738 /*
739 * Remove the item to be processed. If we are removing the last
740 * item on the list, we need to recalculate the tail pointer.
741 * As this happens rarely and usually when the list is short,
742 * we just run down the list to find it rather than tracking it
743 * in the above loop.
744 */
745 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
746 if (wk == worklist_tail) {
747 LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)for((wkend) = ((&softdep_workitem_pending)->lh_first);
(wkend)!= ((void *)0); (wkend) = ((wkend)->wk_list.le_next
))
748 if (LIST_NEXT(wkend, wk_list)((wkend)->wk_list.le_next) == NULL((void *)0))
749 break;
750 worklist_tail = wkend;
751 }
752 num_on_worklist -= 1;
753 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
754 switch (wk->wk_type) {
755
756 case D_DIRREM12:
757 /* removal of a directory entry */
758 mp = WK_DIRREM(wk)((struct dirrem *)(wk))->dm_mnt;
759#if 0
760 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
761 panic("%s: dirrem on suspended filesystem",
762 "process_worklist_item");
763#endif
764 if (matchmnt != NULL((void *)0) && mp == matchmnt)
765 *matchcnt += 1;
766 handle_workitem_remove(WK_DIRREM(wk)((struct dirrem *)(wk)));
767 break;
768
769 case D_FREEBLKS8:
770 /* releasing blocks and/or fragments from a file */
771 mp = WK_FREEBLKS(wk)((struct freeblks *)(wk))->fb_mnt;
772#if 0
773 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
774 panic("%s: freeblks on suspended filesystem",
775 "process_worklist_item");
776#endif
777 if (matchmnt != NULL((void *)0) && mp == matchmnt)
778 *matchcnt += 1;
779 handle_workitem_freeblocks(WK_FREEBLKS(wk)((struct freeblks *)(wk)));
780 break;
781
782 case D_FREEFRAG7:
783 /* releasing a fragment when replaced as a file grows */
784 mp = WK_FREEFRAG(wk)((struct freefrag *)(wk))->ff_mnt;
785#if 0
786 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
787 panic("%s: freefrag on suspended filesystem",
788 "process_worklist_item");
789#endif
790 if (matchmnt != NULL((void *)0) && mp == matchmnt)
791 *matchcnt += 1;
792 handle_workitem_freefrag(WK_FREEFRAG(wk)((struct freefrag *)(wk)));
793 break;
794
795 case D_FREEFILE9:
796 /* releasing an inode when its link count drops to 0 */
797 mp = WK_FREEFILE(wk)((struct freefile *)(wk))->fx_mnt;
798#if 0
799 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
800 panic("%s: freefile on suspended filesystem",
801 "process_worklist_item");
802#endif
803 if (matchmnt != NULL((void *)0) && mp == matchmnt)
804 *matchcnt += 1;
805 handle_workitem_freefile(WK_FREEFILE(wk)((struct freefile *)(wk)));
806 break;
807
808 default:
809 panic("%s_process_worklist: Unknown type %s",
810 "softdep", TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
811 /* NOTREACHED */
812 }
813 return (1);
814}
815
816/*
817 * Move dependencies from one buffer to another.
818 */
819void
820softdep_move_dependencies(struct buf *oldbp, struct buf *newbp)
821{
822 struct worklist *wk, *wktail;
823
824 if (LIST_FIRST(&newbp->b_dep)((&newbp->b_dep)->lh_first) != NULL((void *)0))
825 panic("softdep_move_dependencies: need merge code");
826 wktail = NULL((void *)0);
827 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
828 while ((wk = LIST_FIRST(&oldbp->b_dep)((&oldbp->b_dep)->lh_first)) != NULL((void *)0)) {
829 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
830 if (wktail == NULL((void *)0))
831 LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list)do { if (((wk)->wk_list.le_next = (&newbp->b_dep)->
lh_first) != ((void *)0)) (&newbp->b_dep)->lh_first
->wk_list.le_prev = &(wk)->wk_list.le_next; (&newbp
->b_dep)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&newbp->b_dep)->lh_first; } while (0)
;
832 else
833 LIST_INSERT_AFTER(wktail, wk, wk_list)do { if (((wk)->wk_list.le_next = (wktail)->wk_list.le_next
) != ((void *)0)) (wktail)->wk_list.le_next->wk_list.le_prev
= &(wk)->wk_list.le_next; (wktail)->wk_list.le_next
= (wk); (wk)->wk_list.le_prev = &(wktail)->wk_list
.le_next; } while (0)
;
834 wktail = wk;
835 }
836 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
837}
838
839/*
840 * Purge the work list of all items associated with a particular mount point.
841 */
842int
843softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
844{
845 struct vnode *devvp;
846 int count, error = 0;
847
848 /*
849 * Await our turn to clear out the queue, then serialize access.
850 */
851 while (softdep_worklist_busy) {
852 softdep_worklist_req += 1;
853 tsleep_nsec(&softdep_worklist_req, PRIBIO16, "softflush", INFSLP0xffffffffffffffffULL);
854 softdep_worklist_req -= 1;
855 }
856 softdep_worklist_busy = -1;
857 /*
858 * Alternately flush the block device associated with the mount
859 * point and process any dependencies that the flushing
860 * creates. We continue until no more worklist dependencies
861 * are found.
862 */
863 *countp = 0;
864 devvp = VFSTOUFS(oldmnt)((struct ufsmount *)((oldmnt)->mnt_data))->um_devvp;
865 while ((count = softdep_process_worklist(oldmnt)) > 0) {
866 *countp += count;
867 vn_lock(devvp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
868 error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT1, p);
869 VOP_UNLOCK(devvp);
870 if (error)
871 break;
872 }
873 softdep_worklist_busy = 0;
874 if (softdep_worklist_req)
875 wakeup(&softdep_worklist_req);
876 return (error);
877}
878
879/*
880 * Flush all vnodes and worklist items associated with a specified mount point.
881 */
882int
883softdep_flushfiles(struct mount *oldmnt, int flags, struct proc *p)
884{
885 int error, count, loopcnt;
886
887 /*
888 * Alternately flush the vnodes associated with the mount
889 * point and process any dependencies that the flushing
890 * creates. In theory, this loop can happen at most twice,
891 * but we give it a few extra just to be sure.
892 */
893 for (loopcnt = 10; loopcnt > 0; loopcnt--) {
894 /*
895 * Do another flush in case any vnodes were brought in
896 * as part of the cleanup operations.
897 */
898 if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
899 break;
900 if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
901 count == 0)
902 break;
903 }
904 /*
905 * If the reboot process sleeps during the loop, the update
906 * process may call softdep_process_worklist() and create
907 * new dirty vnodes at the mount point. Call ffs_flushfiles()
908 * again after the loop has flushed all soft dependencies.
909 */
910 if (error == 0)
911 error = ffs_flushfiles(oldmnt, flags, p);
912 /*
913 * If we are unmounting then it is an error to fail. If we
914 * are simply trying to downgrade to read-only, then filesystem
915 * activity can keep us busy forever, so we just fail with EBUSY.
916 */
917 if (loopcnt == 0) {
918 error = EBUSY16;
919 }
920 return (error);
921}
922
923/*
924 * Structure hashing.
925 *
926 * There are three types of structures that can be looked up:
927 * 1) pagedep structures identified by mount point, inode number,
928 * and logical block.
929 * 2) inodedep structures identified by mount point and inode number.
930 * 3) newblk structures identified by mount point and
931 * physical block number.
932 *
933 * The "pagedep" and "inodedep" dependency structures are hashed
934 * separately from the file blocks and inodes to which they correspond.
935 * This separation helps when the in-memory copy of an inode or
936 * file block must be replaced. It also obviates the need to access
937 * an inode or file page when simply updating (or de-allocating)
938 * dependency structures. Lookup of newblk structures is needed to
939 * find newly allocated blocks when trying to associate them with
940 * their allocdirect or allocindir structure.
941 *
942 * The lookup routines optionally create and hash a new instance when
943 * an existing entry is not found.
944 */
945#define DEPALLOC0x0001 0x0001 /* allocate structure if lookup fails */
946#define NODELAY0x0002 0x0002 /* cannot do background work */
947
948SIPHASH_KEY softdep_hashkey;
949
950/*
951 * Structures and routines associated with pagedep caching.
952 */
953LIST_HEAD(pagedep_hashhead, pagedep)struct pagedep_hashhead { struct pagedep *lh_first; } *pagedep_hashtbl;
954u_long pagedep_hash; /* size of hash table - 1 */
955STATIC struct sema pagedep_in_progress;
956
957/*
958 * Look up a pagedep. Return 1 if found, 0 if not found or found
959 * when asked to allocate but not associated with any buffer.
960 * If not found, allocate if DEPALLOC flag is passed.
961 * Found or allocated entry is returned in pagedeppp.
962 * This routine must be called with splbio interrupts blocked.
963 */
964STATIC int
965pagedep_lookup(struct inode *ip, daddr_t lbn, int flags,
966 struct pagedep **pagedeppp)
967{
968 SIPHASH_CTX ctx;
969 struct pagedep *pagedep;
970 struct pagedep_hashhead *pagedephd;
971 struct mount *mp;
972 int i;
973
974 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
975
976#ifdef DEBUG
977 if (lk.lkt_held == -1)
978 panic("pagedep_lookup: lock not held");
979#endif
980 mp = ITOV(ip)((ip)->i_vnode)->v_mount;
981
982 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
983 SipHash24_Update(&ctx, &mp, sizeof(mp))SipHash_Update((&ctx), 2, 4, (&mp), (sizeof(mp)));
984 SipHash24_Update(&ctx, &ip->i_number, sizeof(ip->i_number))SipHash_Update((&ctx), 2, 4, (&ip->i_number), (sizeof
(ip->i_number)))
;
985 SipHash24_Update(&ctx, &lbn, sizeof(lbn))SipHash_Update((&ctx), 2, 4, (&lbn), (sizeof(lbn)));
986 pagedephd = &pagedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & pagedep_hash];
987top:
988 LIST_FOREACH(pagedep, pagedephd, pd_hash)for((pagedep) = ((pagedephd)->lh_first); (pagedep)!= ((void
*)0); (pagedep) = ((pagedep)->pd_hash.le_next))
989 if (ip->i_number == pagedep->pd_ino &&
990 lbn == pagedep->pd_lbn &&
991 mp == pagedep->pd_mnt)
992 break;
993 if (pagedep) {
994 *pagedeppp = pagedep;
995 if ((flags & DEPALLOC0x0001) != 0 &&
996 (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
997 return (0);
998 return (1);
999 }
1000 if ((flags & DEPALLOC0x0001) == 0) {
1001 *pagedeppp = NULL((void *)0);
1002 return (0);
1003 }
1004 if (sema_get(&pagedep_in_progress, &lk) == 0) {
1005 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1006 goto top;
1007 }
1008 pagedep = pool_get(&pagedep_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1009 pagedep->pd_list.wk_type = D_PAGEDEP0;
1010 pagedep->pd_mnt = mp;
1011 pagedep->pd_ino = ip->i_number;
1012 pagedep->pd_lbn = lbn;
1013 LIST_INIT(&pagedep->pd_dirremhd)do { ((&pagedep->pd_dirremhd)->lh_first) = ((void *
)0); } while (0)
;
1014 LIST_INIT(&pagedep->pd_pendinghd)do { ((&pagedep->pd_pendinghd)->lh_first) = ((void *
)0); } while (0)
;
1015 for (i = 0; i < DAHASHSZ6; i++)
1016 LIST_INIT(&pagedep->pd_diraddhd[i])do { ((&pagedep->pd_diraddhd[i])->lh_first) = ((void
*)0); } while (0)
;
1017 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1018 LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash)do { if (((pagedep)->pd_hash.le_next = (pagedephd)->lh_first
) != ((void *)0)) (pagedephd)->lh_first->pd_hash.le_prev
= &(pagedep)->pd_hash.le_next; (pagedephd)->lh_first
= (pagedep); (pagedep)->pd_hash.le_prev = &(pagedephd
)->lh_first; } while (0)
;
1019 sema_release(&pagedep_in_progress);
1020 *pagedeppp = pagedep;
1021 return (0);
1022}
1023
1024/*
1025 * Structures and routines associated with inodedep caching.
1026 */
1027LIST_HEAD(inodedep_hashhead, inodedep)struct inodedep_hashhead { struct inodedep *lh_first; } *inodedep_hashtbl;
1028STATIC u_long inodedep_hash; /* size of hash table - 1 */
1029STATIC long num_inodedep; /* number of inodedep allocated */
1030STATIC struct sema inodedep_in_progress;
1031
1032/*
1033 * Look up a inodedep. Return 1 if found, 0 if not found.
1034 * If not found, allocate if DEPALLOC flag is passed.
1035 * Found or allocated entry is returned in inodedeppp.
1036 * This routine must be called with splbio interrupts blocked.
1037 */
1038STATIC int
1039inodedep_lookup(struct fs *fs, ufsino_t inum, int flags,
1040 struct inodedep **inodedeppp)
1041{
1042 SIPHASH_CTX ctx;
1043 struct inodedep *inodedep;
1044 struct inodedep_hashhead *inodedephd;
1045 int firsttry;
1046
1047 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1048
1049#ifdef DEBUG
1050 if (lk.lkt_held == -1)
1051 panic("inodedep_lookup: lock not held");
1052#endif
1053 firsttry = 1;
1054 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1055 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1056 SipHash24_Update(&ctx, &inum, sizeof(inum))SipHash_Update((&ctx), 2, 4, (&inum), (sizeof(inum)));
1057 inodedephd = &inodedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & inodedep_hash];
1058top:
1059 LIST_FOREACH(inodedep, inodedephd, id_hash)for((inodedep) = ((inodedephd)->lh_first); (inodedep)!= ((
void *)0); (inodedep) = ((inodedep)->id_hash.le_next))
1060 if (inum == inodedep->id_ino && fs == inodedep->id_fs)
1061 break;
1062 if (inodedep) {
1063 *inodedeppp = inodedep;
1064 return (1);
1065 }
1066 if ((flags & DEPALLOC0x0001) == 0) {
1067 *inodedeppp = NULL((void *)0);
1068 return (0);
1069 }
1070 /*
1071 * If we are over our limit, try to improve the situation.
1072 */
1073 if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY0x0002) == 0 &&
1074 request_cleanup(FLUSH_INODES1, 1)) {
1075 firsttry = 0;
1076 goto top;
1077 }
1078 if (sema_get(&inodedep_in_progress, &lk) == 0) {
1079 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1080 goto top;
1081 }
1082 num_inodedep += 1;
1083 inodedep = pool_get(&inodedep_pool, PR_WAITOK0x0001);
1084 inodedep->id_list.wk_type = D_INODEDEP1;
1085 inodedep->id_fs = fs;
1086 inodedep->id_ino = inum;
1087 inodedep->id_stateid_list.wk_state = ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
1088 inodedep->id_nlinkdelta = 0;
1089 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
1090 inodedep->id_savedsize = -1;
1091 inodedep->id_buf = NULL((void *)0);
1092 LIST_INIT(&inodedep->id_pendinghd)do { ((&inodedep->id_pendinghd)->lh_first) = ((void
*)0); } while (0)
;
1093 LIST_INIT(&inodedep->id_inowait)do { ((&inodedep->id_inowait)->lh_first) = ((void *
)0); } while (0)
;
1094 LIST_INIT(&inodedep->id_bufwait)do { ((&inodedep->id_bufwait)->lh_first) = ((void *
)0); } while (0)
;
1095 TAILQ_INIT(&inodedep->id_inoupdt)do { (&inodedep->id_inoupdt)->tqh_first = ((void *)
0); (&inodedep->id_inoupdt)->tqh_last = &(&
inodedep->id_inoupdt)->tqh_first; } while (0)
;
1096 TAILQ_INIT(&inodedep->id_newinoupdt)do { (&inodedep->id_newinoupdt)->tqh_first = ((void
*)0); (&inodedep->id_newinoupdt)->tqh_last = &
(&inodedep->id_newinoupdt)->tqh_first; } while (0)
;
1097 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1098 LIST_INSERT_HEAD(inodedephd, inodedep, id_hash)do { if (((inodedep)->id_hash.le_next = (inodedephd)->lh_first
) != ((void *)0)) (inodedephd)->lh_first->id_hash.le_prev
= &(inodedep)->id_hash.le_next; (inodedephd)->lh_first
= (inodedep); (inodedep)->id_hash.le_prev = &(inodedephd
)->lh_first; } while (0)
;
1099 sema_release(&inodedep_in_progress);
1100 *inodedeppp = inodedep;
1101 return (0);
1102}
1103
1104/*
1105 * Structures and routines associated with newblk caching.
1106 */
1107LIST_HEAD(newblk_hashhead, newblk)struct newblk_hashhead { struct newblk *lh_first; } *newblk_hashtbl;
1108u_long newblk_hash; /* size of hash table - 1 */
1109STATIC struct sema newblk_in_progress;
1110
1111/*
1112 * Look up a newblk. Return 1 if found, 0 if not found.
1113 * If not found, allocate if DEPALLOC flag is passed.
1114 * Found or allocated entry is returned in newblkpp.
1115 */
1116STATIC int
1117newblk_lookup(struct fs *fs, daddr_t newblkno, int flags,
1118 struct newblk **newblkpp)
1119{
1120 SIPHASH_CTX ctx;
1121 struct newblk *newblk;
1122 struct newblk_hashhead *newblkhd;
1123
1124 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1125 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1126 SipHash24_Update(&ctx, &newblkno, sizeof(newblkno))SipHash_Update((&ctx), 2, 4, (&newblkno), (sizeof(newblkno
)))
;
1127 newblkhd = &newblk_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & newblk_hash];
1128top:
1129 LIST_FOREACH(newblk, newblkhd, nb_hash)for((newblk) = ((newblkhd)->lh_first); (newblk)!= ((void *
)0); (newblk) = ((newblk)->nb_hash.le_next))
1130 if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
1131 break;
1132 if (newblk) {
1133 *newblkpp = newblk;
1134 return (1);
1135 }
1136 if ((flags & DEPALLOC0x0001) == 0) {
1137 *newblkpp = NULL((void *)0);
1138 return (0);
1139 }
1140 if (sema_get(&newblk_in_progress, NULL((void *)0)) == 0)
1141 goto top;
1142 newblk = pool_get(&newblk_pool, PR_WAITOK0x0001);
1143 newblk->nb_state = 0;
1144 newblk->nb_fs = fs;
1145 newblk->nb_newblkno = newblkno;
1146 LIST_INSERT_HEAD(newblkhd, newblk, nb_hash)do { if (((newblk)->nb_hash.le_next = (newblkhd)->lh_first
) != ((void *)0)) (newblkhd)->lh_first->nb_hash.le_prev
= &(newblk)->nb_hash.le_next; (newblkhd)->lh_first
= (newblk); (newblk)->nb_hash.le_prev = &(newblkhd)->
lh_first; } while (0)
;
1147 sema_release(&newblk_in_progress);
1148 *newblkpp = newblk;
1149 return (0);
1150}
1151
1152/*
1153 * Executed during filesystem system initialization before
1154 * mounting any file systems.
1155 */
1156void
1157softdep_initialize(void)
1158{
1159
1160 bioops.io_start = softdep_disk_io_initiation;
1161 bioops.io_complete = softdep_disk_write_complete;
1162 bioops.io_deallocate = softdep_deallocate_dependencies;
1163 bioops.io_movedeps = softdep_move_dependencies;
1164 bioops.io_countdeps = softdep_count_dependencies;
1165
1166 LIST_INIT(&mkdirlisthd)do { ((&mkdirlisthd)->lh_first) = ((void *)0); } while
(0)
;
1167 LIST_INIT(&softdep_workitem_pending)do { ((&softdep_workitem_pending)->lh_first) = ((void *
)0); } while (0)
;
1168#ifdef KMEMSTATS1
1169 max_softdeps = min (initialvnodes * 8,
1170 kmemstats[M_INODEDEP79].ks_limit / (2 * sizeof(struct inodedep)));
1171#else
1172 max_softdeps = initialvnodes * 4;
1173#endif
1174 arc4random_buf(&softdep_hashkey, sizeof(softdep_hashkey));
1175 pagedep_hashtbl = hashinit(initialvnodes / 5, M_PAGEDEP78, M_WAITOK0x0001,
1176 &pagedep_hash);
1177 sema_init(&pagedep_in_progress, "pagedep", PRIBIO16);
1178 inodedep_hashtbl = hashinit(initialvnodes, M_INODEDEP79, M_WAITOK0x0001,
1179 &inodedep_hash);
1180 sema_init(&inodedep_in_progress, "inodedep", PRIBIO16);
1181 newblk_hashtbl = hashinit(64, M_NEWBLK80, M_WAITOK0x0001, &newblk_hash);
1182 sema_init(&newblk_in_progress, "newblk", PRIBIO16);
1183 timeout_set(&proc_waiting_timeout, pause_timer, NULL((void *)0));
1184 pool_init(&pagedep_pool, sizeof(struct pagedep), 0, IPL_NONE0x0,
1185 PR_WAITOK0x0001, "pagedep", NULL((void *)0));
1186 pool_init(&inodedep_pool, sizeof(struct inodedep), 0, IPL_NONE0x0,
1187 PR_WAITOK0x0001, "inodedep", NULL((void *)0));
1188 pool_init(&newblk_pool, sizeof(struct newblk), 0, IPL_NONE0x0,
1189 PR_WAITOK0x0001, "newblk", NULL((void *)0));
1190 pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, IPL_NONE0x0,
1191 PR_WAITOK0x0001, "bmsafemap", NULL((void *)0));
1192 pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, IPL_NONE0x0,
1193 PR_WAITOK0x0001, "allocdir", NULL((void *)0));
1194 pool_init(&indirdep_pool, sizeof(struct indirdep), 0, IPL_NONE0x0,
1195 PR_WAITOK0x0001, "indirdep", NULL((void *)0));
1196 pool_init(&allocindir_pool, sizeof(struct allocindir), 0, IPL_NONE0x0,
1197 PR_WAITOK0x0001, "allocindir", NULL((void *)0));
1198 pool_init(&freefrag_pool, sizeof(struct freefrag), 0, IPL_NONE0x0,
1199 PR_WAITOK0x0001, "freefrag", NULL((void *)0));
1200 pool_init(&freeblks_pool, sizeof(struct freeblks), 0, IPL_NONE0x0,
1201 PR_WAITOK0x0001, "freeblks", NULL((void *)0));
1202 pool_init(&freefile_pool, sizeof(struct freefile), 0, IPL_NONE0x0,
1203 PR_WAITOK0x0001, "freefile", NULL((void *)0));
1204 pool_init(&diradd_pool, sizeof(struct diradd), 0, IPL_NONE0x0,
1205 PR_WAITOK0x0001, "diradd", NULL((void *)0));
1206 pool_init(&mkdir_pool, sizeof(struct mkdir), 0, IPL_NONE0x0,
1207 PR_WAITOK0x0001, "mkdir", NULL((void *)0));
1208 pool_init(&dirrem_pool, sizeof(struct dirrem), 0, IPL_NONE0x0,
1209 PR_WAITOK0x0001, "dirrem", NULL((void *)0));
1210 pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, IPL_NONE0x0,
1211 PR_WAITOK0x0001, "newdirblk", NULL((void *)0));
1212}
1213
1214/*
1215 * Called at mount time to notify the dependency code that a
1216 * filesystem wishes to use it.
1217 */
1218int
1219softdep_mount(struct vnode *devvp, struct mount *mp, struct fs *fs,
1220 struct ucred *cred)
1221{
1222 struct csum_total cstotal;
1223 struct cg *cgp;
1224 struct buf *bp;
1225 int error, cyl;
1226
1227 KASSERTMSG(1, "softdep_mount should not have been called")((1) ? (void)0 : panic("kernel %sassertion \"%s\" failed: file \"%s\", line %d"
" " "softdep_mount should not have been called", "diagnostic "
, "1", "/usr/src/sys/ufs/ffs/ffs_softdep.c", 1227))
;
1228
1229 /*
1230 * When doing soft updates, the counters in the
1231 * superblock may have gotten out of sync, so we have
1232 * to scan the cylinder groups and recalculate them.
1233 */
1234 if ((fs->fs_flags & FS_UNCLEAN0x01) == 0)
1235 return (0);
1236 memset(&cstotal, 0, sizeof(cstotal))__builtin_memset((&cstotal), (0), (sizeof(cstotal)));
1237 for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
1238 if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl))((((((daddr_t)(fs)->fs_fpg * (cyl)) + (fs)->fs_cgoffset
* ((cyl) & ~((fs)->fs_cgmask))) + (fs)->fs_cblkno)
) << (fs)->fs_fsbtodb)
,
1239 fs->fs_cgsize, &bp)) != 0) {
1240 brelse(bp);
1241 return (error);
1242 }
1243 cgp = (struct cg *)bp->b_data;
1244 cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
1245 cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
1246 cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
1247 cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
1248 fs->fs_cs(fs, cyl)fs_csp[cyl] = cgp->cg_cs;
1249 brelse(bp);
1250 }
1251#ifdef DEBUG
1252 if (memcmp(&cstotal, &fs->fs_cstotal, sizeof(cstotal))__builtin_memcmp((&cstotal), (&fs->fs_cstotal), (sizeof
(cstotal)))
)
1253 printf("ffs_mountfs: superblock updated for soft updates\n");
1254#endif
1255 memcpy(&fs->fs_cstotal, &cstotal, sizeof(cstotal))__builtin_memcpy((&fs->fs_cstotal), (&cstotal), (sizeof
(cstotal)))
;
1256 return (0);
1257}
1258
1259/*
1260 * Protecting the freemaps (or bitmaps).
1261 *
1262 * To eliminate the need to execute fsck before mounting a file system
1263 * after a power failure, one must (conservatively) guarantee that the
1264 * on-disk copy of the bitmaps never indicate that a live inode or block is
1265 * free. So, when a block or inode is allocated, the bitmap should be
1266 * updated (on disk) before any new pointers. When a block or inode is
1267 * freed, the bitmap should not be updated until all pointers have been
1268 * reset. The latter dependency is handled by the delayed de-allocation
1269 * approach described below for block and inode de-allocation. The former
1270 * dependency is handled by calling the following procedure when a block or
1271 * inode is allocated. When an inode is allocated an "inodedep" is created
1272 * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
1273 * Each "inodedep" is also inserted into the hash indexing structure so
1274 * that any additional link additions can be made dependent on the inode
1275 * allocation.
1276 *
1277 * The ufs file system maintains a number of free block counts (e.g., per
1278 * cylinder group, per cylinder and per <cylinder, rotational position> pair)
1279 * in addition to the bitmaps. These counts are used to improve efficiency
1280 * during allocation and therefore must be consistent with the bitmaps.
1281 * There is no convenient way to guarantee post-crash consistency of these
1282 * counts with simple update ordering, for two main reasons: (1) The counts
1283 * and bitmaps for a single cylinder group block are not in the same disk
1284 * sector. If a disk write is interrupted (e.g., by power failure), one may
1285 * be written and the other not. (2) Some of the counts are located in the
1286 * superblock rather than the cylinder group block. So, we focus our soft
1287 * updates implementation on protecting the bitmaps. When mounting a
1288 * filesystem, we recompute the auxiliary counts from the bitmaps.
1289 */
1290
1291/*
1292 * Called just after updating the cylinder group block to allocate an inode.
1293 */
1294/* buffer for cylgroup block with inode map */
1295/* inode related to allocation */
1296/* new inode number being allocated */
1297void
1298softdep_setup_inomapdep(struct buf *bp, struct inode *ip, ufsino_t newinum)
1299{
1300 struct inodedep *inodedep;
1301 struct bmsafemap *bmsafemap;
1302
1303 /*
1304 * Create a dependency for the newly allocated inode.
1305 * Panic if it already exists as something is seriously wrong.
1306 * Otherwise add it to the dependency list for the buffer holding
1307 * the cylinder group map from which it was allocated.
1308 */
1309 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1310 if (inodedep_lookup(ip->i_fsinode_u.fs, newinum, DEPALLOC0x0001 | NODELAY0x0002, &inodedep)
1311 != 0) {
1312 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1313 panic("softdep_setup_inomapdep: found inode");
1314 }
1315 inodedep->id_buf = bp;
1316 inodedep->id_stateid_list.wk_state &= ~DEPCOMPLETE0x0008;
1317 bmsafemap = bmsafemap_lookup(bp);
1318 LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps)do { if (((inodedep)->id_deps.le_next = (&bmsafemap->
sm_inodedephd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_inodedephd)->lh_first->id_deps.le_prev = &(inodedep
)->id_deps.le_next; (&bmsafemap->sm_inodedephd)->
lh_first = (inodedep); (inodedep)->id_deps.le_prev = &
(&bmsafemap->sm_inodedephd)->lh_first; } while (0)
;
1319 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1320}
1321
1322/*
1323 * Called just after updating the cylinder group block to
1324 * allocate block or fragment.
1325 */
1326/* buffer for cylgroup block with block map */
1327/* filesystem doing allocation */
1328/* number of newly allocated block */
1329void
1330softdep_setup_blkmapdep(struct buf *bp, struct fs *fs, daddr_t newblkno)
1331{
1332 struct newblk *newblk;
1333 struct bmsafemap *bmsafemap;
1334
1335 /*
1336 * Create a dependency for the newly allocated block.
1337 * Add it to the dependency list for the buffer holding
1338 * the cylinder group map from which it was allocated.
1339 */
1340 if (newblk_lookup(fs, newblkno, DEPALLOC0x0001, &newblk) != 0)
1341 panic("softdep_setup_blkmapdep: found block");
1342 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1343 newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
1344 LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps)do { if (((newblk)->nb_deps.le_next = (&bmsafemap->
sm_newblkhd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_newblkhd)->lh_first->nb_deps.le_prev = &(newblk)
->nb_deps.le_next; (&bmsafemap->sm_newblkhd)->lh_first
= (newblk); (newblk)->nb_deps.le_prev = &(&bmsafemap
->sm_newblkhd)->lh_first; } while (0)
;
1345 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1346}
1347
1348/*
1349 * Find the bmsafemap associated with a cylinder group buffer.
1350 * If none exists, create one. The buffer must be locked when
1351 * this routine is called and this routine must be called with
1352 * splbio interrupts blocked.
1353 */
1354STATIC struct bmsafemap *
1355bmsafemap_lookup(struct buf *bp)
1356{
1357 struct bmsafemap *bmsafemap;
1358 struct worklist *wk;
1359
1360 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1361
1362#ifdef DEBUG
1363 if (lk.lkt_held == -1)
1364 panic("bmsafemap_lookup: lock not held");
1365#endif
1366 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
1367 if (wk->wk_type == D_BMSAFEMAP3)
1368 return (WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk)));
1369 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1370 bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK0x0001);
1371 bmsafemap->sm_list.wk_type = D_BMSAFEMAP3;
1372 bmsafemap->sm_list.wk_state = 0;
1373 bmsafemap->sm_buf = bp;
1374 LIST_INIT(&bmsafemap->sm_allocdirecthd)do { ((&bmsafemap->sm_allocdirecthd)->lh_first) = (
(void *)0); } while (0)
;
1375 LIST_INIT(&bmsafemap->sm_allocindirhd)do { ((&bmsafemap->sm_allocindirhd)->lh_first) = ((
void *)0); } while (0)
;
1376 LIST_INIT(&bmsafemap->sm_inodedephd)do { ((&bmsafemap->sm_inodedephd)->lh_first) = ((void
*)0); } while (0)
;
1377 LIST_INIT(&bmsafemap->sm_newblkhd)do { ((&bmsafemap->sm_newblkhd)->lh_first) = ((void
*)0); } while (0)
;
1378 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1379 WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list)do { (&bmsafemap->sm_list)->wk_state |= 0x8000; do {
if (((&bmsafemap->sm_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&bmsafemap->
sm_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&bmsafemap->sm_list); (&bmsafemap->sm_list)
->wk_list.le_prev = &(&bp->b_dep)->lh_first;
} while (0); } while (0)
;
1380 return (bmsafemap);
1381}
1382
1383/*
1384 * Direct block allocation dependencies.
1385 *
1386 * When a new block is allocated, the corresponding disk locations must be
1387 * initialized (with zeros or new data) before the on-disk inode points to
1388 * them. Also, the freemap from which the block was allocated must be
1389 * updated (on disk) before the inode's pointer. These two dependencies are
1390 * independent of each other and are needed for all file blocks and indirect
1391 * blocks that are pointed to directly by the inode. Just before the
1392 * "in-core" version of the inode is updated with a newly allocated block
1393 * number, a procedure (below) is called to setup allocation dependency
1394 * structures. These structures are removed when the corresponding
1395 * dependencies are satisfied or when the block allocation becomes obsolete
1396 * (i.e., the file is deleted, the block is de-allocated, or the block is a
1397 * fragment that gets upgraded). All of these cases are handled in
1398 * procedures described later.
1399 *
1400 * When a file extension causes a fragment to be upgraded, either to a larger
1401 * fragment or to a full block, the on-disk location may change (if the
1402 * previous fragment could not simply be extended). In this case, the old
1403 * fragment must be de-allocated, but not until after the inode's pointer has
1404 * been updated. In most cases, this is handled by later procedures, which
1405 * will construct a "freefrag" structure to be added to the workitem queue
1406 * when the inode update is complete (or obsolete). The main exception to
1407 * this is when an allocation occurs while a pending allocation dependency
1408 * (for the same block pointer) remains. This case is handled in the main
1409 * allocation dependency setup procedure by immediately freeing the
1410 * unreferenced fragments.
1411 */
1412/* inode to which block is being added */
1413/* block pointer within inode */
1414/* disk block number being added */
1415/* previous block number, 0 unless frag */
1416/* size of new block */
1417/* size of new block */
1418/* bp for allocated block */
1419void
1420softdep_setup_allocdirect(struct inode *ip, daddr_t lbn, daddr_t newblkno,
1421 daddr_t oldblkno, long newsize, long oldsize, struct buf *bp)
1422{
1423 struct allocdirect *adp, *oldadp;
1424 struct allocdirectlst *adphead;
1425 struct bmsafemap *bmsafemap;
1426 struct inodedep *inodedep;
1427 struct pagedep *pagedep;
1428 struct newblk *newblk;
1429
1430 adp = pool_get(&allocdirect_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1431 adp->ad_list.wk_type = D_ALLOCDIRECT4;
1432 adp->ad_lbn = lbn;
1433 adp->ad_newblkno = newblkno;
1434 adp->ad_oldblkno = oldblkno;
1435 adp->ad_newsize = newsize;
1436 adp->ad_oldsize = oldsize;
1437 adp->ad_statead_list.wk_state = ATTACHED0x0001;
1438 LIST_INIT(&adp->ad_newdirblk)do { ((&adp->ad_newdirblk)->lh_first) = ((void *)0)
; } while (0)
;
1439 if (newblkno == oldblkno)
1440 adp->ad_freefrag = NULL((void *)0);
1441 else
1442 adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
1443
1444 if (newblk_lookup(ip->i_fsinode_u.fs, newblkno, 0, &newblk) == 0)
1445 panic("softdep_setup_allocdirect: lost block");
1446
1447 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1448 inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, DEPALLOC0x0001 | NODELAY0x0002, &inodedep);
1449 adp->ad_inodedep = inodedep;
1450
1451 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1452 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
1453 adp->ad_buf = NULL((void *)0);
1454 } else {
1455 bmsafemap = newblk->nb_bmsafemap;
1456 adp->ad_buf = bmsafemap->sm_buf;
1457 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1458 LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps)do { if (((adp)->ad_deps.le_next = (&bmsafemap->sm_allocdirecthd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocdirecthd
)->lh_first->ad_deps.le_prev = &(adp)->ad_deps.le_next
; (&bmsafemap->sm_allocdirecthd)->lh_first = (adp);
(adp)->ad_deps.le_prev = &(&bmsafemap->sm_allocdirecthd
)->lh_first; } while (0)
;
1459 }
1460 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1461 pool_put(&newblk_pool, newblk);
1462
1463 if (bp == NULL((void *)0)) {
1464 /*
1465 * XXXUBC - Yes, I know how to fix this, but not right now.
1466 */
1467 panic("softdep_setup_allocdirect: Bonk art in the head");
1468 }
1469 WORKLIST_INSERT(&bp->b_dep, &adp->ad_list)do { (&adp->ad_list)->wk_state |= 0x8000; do { if (
((&adp->ad_list)->wk_list.le_next = (&bp->b_dep
)->lh_first) != ((void *)0)) (&bp->b_dep)->lh_first
->wk_list.le_prev = &(&adp->ad_list)->wk_list
.le_next; (&bp->b_dep)->lh_first = (&adp->ad_list
); (&adp->ad_list)->wk_list.le_prev = &(&bp
->b_dep)->lh_first; } while (0); } while (0)
;
1470 if (lbn >= NDADDR12) {
1471 /* allocating an indirect block */
1472 if (oldblkno != 0) {
1473 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1474 panic("softdep_setup_allocdirect: non-zero indir");
1475 }
1476 } else {
1477 /*
1478 * Allocating a direct block.
1479 *
1480 * If we are allocating a directory block, then we must
1481 * allocate an associated pagedep to track additions and
1482 * deletions.
1483 */
1484 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1485 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1486 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
1487 }
1488 /*
1489 * The list of allocdirects must be kept in sorted and ascending
1490 * order so that the rollback routines can quickly determine the
1491 * first uncommitted block (the size of the file stored on disk
1492 * ends at the end of the lowest committed fragment, or if there
1493 * are no fragments, at the end of the highest committed block).
1494 * Since files generally grow, the typical case is that the new
1495 * block is to be added at the end of the list. We speed this
1496 * special case by checking against the last allocdirect in the
1497 * list before laboriously traversing the list looking for the
1498 * insertion point.
1499 */
1500 adphead = &inodedep->id_newinoupdt;
1501 oldadp = TAILQ_LAST(adphead, allocdirectlst)(*(((struct allocdirectlst *)((adphead)->tqh_last))->tqh_last
))
;
1502 if (oldadp == NULL((void *)0) || oldadp->ad_lbn <= lbn) {
1503 /* insert at end of list */
1504 TAILQ_INSERT_TAIL(adphead, adp, ad_next)do { (adp)->ad_next.tqe_next = ((void *)0); (adp)->ad_next
.tqe_prev = (adphead)->tqh_last; *(adphead)->tqh_last =
(adp); (adphead)->tqh_last = &(adp)->ad_next.tqe_next
; } while (0)
;
1505 if (oldadp != NULL((void *)0) && oldadp->ad_lbn == lbn)
1506 allocdirect_merge(adphead, adp, oldadp);
1507 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1508 return;
1509 }
1510 TAILQ_FOREACH(oldadp, adphead, ad_next)for((oldadp) = ((adphead)->tqh_first); (oldadp) != ((void *
)0); (oldadp) = ((oldadp)->ad_next.tqe_next))
{
1511 if (oldadp->ad_lbn >= lbn)
1512 break;
1513 }
1514 if (oldadp == NULL((void *)0)) {
1515 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1516 panic("softdep_setup_allocdirect: lost entry");
1517 }
1518 /* insert in middle of list */
1519 TAILQ_INSERT_BEFORE(oldadp, adp, ad_next)do { (adp)->ad_next.tqe_prev = (oldadp)->ad_next.tqe_prev
; (adp)->ad_next.tqe_next = (oldadp); *(oldadp)->ad_next
.tqe_prev = (adp); (oldadp)->ad_next.tqe_prev = &(adp)
->ad_next.tqe_next; } while (0)
;
1520 if (oldadp->ad_lbn == lbn)
1521 allocdirect_merge(adphead, adp, oldadp);
1522 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1523}
1524
1525/*
1526 * Replace an old allocdirect dependency with a newer one.
1527 * This routine must be called with splbio interrupts blocked.
1528 */
1529/* head of list holding allocdirects */
1530/* allocdirect being added */
1531/* existing allocdirect being checked */
1532STATIC void
1533allocdirect_merge(struct allocdirectlst *adphead, struct allocdirect *newadp,
1534 struct allocdirect *oldadp)
1535{
1536 struct worklist *wk;
1537 struct freefrag *freefrag;
1538 struct newdirblk *newdirblk;
1539
1540 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1541
1542#ifdef DEBUG
1543 if (lk.lkt_held == -1)
1544 panic("allocdirect_merge: lock not held");
1545#endif
1546 if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
1547 newadp->ad_oldsize != oldadp->ad_newsize ||
1548 newadp->ad_lbn >= NDADDR12) {
1549 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1550 panic("allocdirect_merge: old %lld != new %lld || lbn %lld >= "
1551 "%d", (long long)newadp->ad_oldblkno,
1552 (long long)oldadp->ad_newblkno, (long long)newadp->ad_lbn,
1553 NDADDR12);
1554 }
1555 newadp->ad_oldblkno = oldadp->ad_oldblkno;
1556 newadp->ad_oldsize = oldadp->ad_oldsize;
1557 /*
1558 * If the old dependency had a fragment to free or had never
1559 * previously had a block allocated, then the new dependency
1560 * can immediately post its freefrag and adopt the old freefrag.
1561 * This action is done by swapping the freefrag dependencies.
1562 * The new dependency gains the old one's freefrag, and the
1563 * old one gets the new one and then immediately puts it on
1564 * the worklist when it is freed by free_allocdirect. It is
1565 * not possible to do this swap when the old dependency had a
1566 * non-zero size but no previous fragment to free. This condition
1567 * arises when the new block is an extension of the old block.
1568 * Here, the first part of the fragment allocated to the new
1569 * dependency is part of the block currently claimed on disk by
1570 * the old dependency, so cannot legitimately be freed until the
1571 * conditions for the new dependency are fulfilled.
1572 */
1573 if (oldadp->ad_freefrag != NULL((void *)0) || oldadp->ad_oldblkno == 0) {
1574 freefrag = newadp->ad_freefrag;
1575 newadp->ad_freefrag = oldadp->ad_freefrag;
1576 oldadp->ad_freefrag = freefrag;
1577 }
1578 /*
1579 * If we are tracking a new directory-block allocation,
1580 * move it from the old allocdirect to the new allocdirect.
1581 */
1582 if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
1583 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
1584 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
1585 if (LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first) != NULL((void *)0))
1586 panic("allocdirect_merge: extra newdirblk");
1587 WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
newadp->ad_newdirblk)->lh_first) != ((void *)0)) (&
newadp->ad_newdirblk)->lh_first->wk_list.le_prev = &
(&newdirblk->db_list)->wk_list.le_next; (&newadp
->ad_newdirblk)->lh_first = (&newdirblk->db_list
); (&newdirblk->db_list)->wk_list.le_prev = &(&
newadp->ad_newdirblk)->lh_first; } while (0); } while (
0)
;
1588 }
1589 free_allocdirect(adphead, oldadp, 0);
1590}
1591
1592/*
1593 * Allocate a new freefrag structure if needed.
1594 */
1595STATIC struct freefrag *
1596newfreefrag(struct inode *ip, daddr_t blkno, long size)
1597{
1598 struct freefrag *freefrag;
1599 struct fs *fs;
1600
1601 if (blkno == 0)
1602 return (NULL((void *)0));
1603 fs = ip->i_fsinode_u.fs;
1604 if (fragnum(fs, blkno)((blkno) & ((fs)->fs_frag - 1)) + numfrags(fs, size)((size) >> (fs)->fs_fshift) > fs->fs_frag)
1605 panic("newfreefrag: frag size");
1606 freefrag = pool_get(&freefrag_pool, PR_WAITOK0x0001);
1607 freefrag->ff_list.wk_type = D_FREEFRAG7;
1608 freefrag->ff_stateff_list.wk_state = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
& ~ONWORKLIST0x8000; /* used below */
1609 freefrag->ff_inum = ip->i_number;
1610 freefrag->ff_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1611 freefrag->ff_devvp = ip->i_devvpi_ump->um_devvp;
1612 freefrag->ff_blkno = blkno;
1613 freefrag->ff_fragsize = size;
1614 return (freefrag);
1615}
1616
1617/*
1618 * This workitem de-allocates fragments that were replaced during
1619 * file block allocation.
1620 */
1621STATIC void
1622handle_workitem_freefrag(struct freefrag *freefrag)
1623{
1624 struct inode tip;
1625 struct ufs1_dinode dtip1;
1626
1627 tip.i_vnode = NULL((void *)0);
1628 tip.i_din1dinode_u.ffs1_din = &dtip1;
1629 tip.i_fsinode_u.fs = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data))->um_fsufsmount_u.fs;
1630 tip.i_ump = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data));
1631 tip.i_dev = freefrag->ff_devvp->v_rdevv_un.vu_specinfo->si_rdev;
1632 tip.i_number = freefrag->ff_inum;
1633 tip.i_ffs1_uiddinode_u.ffs1_din->di_uid = freefrag->ff_stateff_list.wk_state & ~ONWORKLIST0x8000; /* set above */
1634 ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
1635 pool_put(&freefrag_pool, freefrag);
1636}
1637
1638/*
1639 * Indirect block allocation dependencies.
1640 *
1641 * The same dependencies that exist for a direct block also exist when
1642 * a new block is allocated and pointed to by an entry in a block of
1643 * indirect pointers. The undo/redo states described above are also
1644 * used here. Because an indirect block contains many pointers that
1645 * may have dependencies, a second copy of the entire in-memory indirect
1646 * block is kept. The buffer cache copy is always completely up-to-date.
1647 * The second copy, which is used only as a source for disk writes,
1648 * contains only the safe pointers (i.e., those that have no remaining
1649 * update dependencies). The second copy is freed when all pointers
1650 * are safe. The cache is not allowed to replace indirect blocks with
1651 * pending update dependencies. If a buffer containing an indirect
1652 * block with dependencies is written, these routines will mark it
1653 * dirty again. It can only be successfully written once all the
1654 * dependencies are removed. The ffs_fsync routine in conjunction with
1655 * softdep_sync_metadata work together to get all the dependencies
1656 * removed so that a file can be successfully written to disk. Three
1657 * procedures are used when setting up indirect block pointer
1658 * dependencies. The division is necessary because of the organization
1659 * of the "balloc" routine and because of the distinction between file
1660 * pages and file metadata blocks.
1661 */
1662
1663/*
1664 * Allocate a new allocindir structure.
1665 */
1666/* inode for file being extended */
1667/* offset of pointer in indirect block */
1668/* disk block number being added */
1669/* previous block number, 0 if none */
1670STATIC struct allocindir *
1671newallocindir(struct inode *ip, int ptrno, daddr_t newblkno,
1672 daddr_t oldblkno)
1673{
1674 struct allocindir *aip;
1675
1676 aip = pool_get(&allocindir_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1677 aip->ai_list.wk_type = D_ALLOCINDIR6;
1678 aip->ai_stateai_list.wk_state = ATTACHED0x0001;
1679 aip->ai_offset = ptrno;
1680 aip->ai_newblkno = newblkno;
1681 aip->ai_oldblkno = oldblkno;
1682 aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fsinode_u.fs->fs_bsize);
1683 return (aip);
1684}
1685
1686/*
1687 * Called just before setting an indirect block pointer
1688 * to a newly allocated file page.
1689 */
1690/* inode for file being extended */
1691/* allocated block number within file */
1692/* buffer with indirect blk referencing page */
1693/* offset of pointer in indirect block */
1694/* disk block number being added */
1695/* previous block number, 0 if none */
1696/* buffer holding allocated page */
1697void
1698softdep_setup_allocindir_page(struct inode *ip, daddr_t lbn, struct buf *bp,
1699 int ptrno, daddr_t newblkno, daddr_t oldblkno, struct buf *nbp)
1700{
1701 struct allocindir *aip;
1702 struct pagedep *pagedep;
1703
1704 aip = newallocindir(ip, ptrno, newblkno, oldblkno);
1705 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1706 /*
1707 * If we are allocating a directory page, then we must
1708 * allocate an associated pagedep to track additions and
1709 * deletions.
1710 */
1711 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1712 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1713 WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&nbp
->b_dep)->lh_first) != ((void *)0)) (&nbp->b_dep
)->lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&nbp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&nbp->b_dep)->lh_first; } while (0)
; } while (0)
;
1714 if (nbp == NULL((void *)0)) {
1715 /*
1716 * XXXUBC - Yes, I know how to fix this, but not right now.
1717 */
1718 panic("softdep_setup_allocindir_page: Bonk art in the head");
1719 }
1720 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1721 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1722 setup_allocindir_phase2(bp, ip, aip);
1723}
1724
1725/*
1726 * Called just before setting an indirect block pointer to a
1727 * newly allocated indirect block.
1728 */
1729/* newly allocated indirect block */
1730/* inode for file being extended */
1731/* indirect block referencing allocated block */
1732/* offset of pointer in indirect block */
1733/* disk block number being added */
1734void
1735softdep_setup_allocindir_meta(struct buf *nbp, struct inode *ip,
1736 struct buf *bp, int ptrno, daddr_t newblkno)
1737{
1738 struct allocindir *aip;
1739
1740 aip = newallocindir(ip, ptrno, newblkno, 0);
1741 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1742 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1743 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1744 setup_allocindir_phase2(bp, ip, aip);
1745}
1746
1747/*
1748 * Called to finish the allocation of the "aip" allocated
1749 * by one of the two routines above.
1750 */
1751/* in-memory copy of the indirect block */
1752/* inode for file being extended */
1753/* allocindir allocated by the above routines */
1754STATIC void
1755setup_allocindir_phase2(struct buf *bp, struct inode *ip,
1756 struct allocindir *aip)
1757{
1758 struct worklist *wk;
1759 struct indirdep *indirdep, *newindirdep;
1760 struct bmsafemap *bmsafemap;
1761 struct allocindir *oldaip;
1762 struct freefrag *freefrag;
1763 struct newblk *newblk;
1764
1765 if (bp->b_lblkno >= 0)
1766 panic("setup_allocindir_phase2: not indir blk");
1767 for (indirdep = NULL((void *)0), newindirdep = NULL((void *)0); ; ) {
1768 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1769 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
1770 if (wk->wk_type != D_INDIRDEP5)
1771 continue;
1772 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
1773 break;
1774 }
1775 if (indirdep == NULL((void *)0) && newindirdep) {
1776 indirdep = newindirdep;
1777 WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list)do { (&indirdep->ir_list)->wk_state |= 0x8000; do {
if (((&indirdep->ir_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&indirdep->
ir_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&indirdep->ir_list); (&indirdep->ir_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
1778 newindirdep = NULL((void *)0);
1779 }
1780 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1781 if (indirdep) {
1782 if (newblk_lookup(ip->i_fsinode_u.fs, aip->ai_newblkno, 0,
1783 &newblk) == 0)
1784 panic("setup_allocindir: lost block");
1785 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1786 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1787 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
1788 aip->ai_buf = NULL((void *)0);
1789 } else {
1790 bmsafemap = newblk->nb_bmsafemap;
1791 aip->ai_buf = bmsafemap->sm_buf;
1792 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1793 LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
1794 aip, ai_deps)do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
;
1795 }
1796 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1797 pool_put(&newblk_pool, newblk);
1798 aip->ai_indirdep = indirdep;
1799 /*
1800 * Check to see if there is an existing dependency
1801 * for this block. If there is, merge the old
1802 * dependency into the new one.
1803 */
1804 if (aip->ai_oldblkno == 0)
1805 oldaip = NULL((void *)0);
1806 else
1807
1808 LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)for((oldaip) = ((&indirdep->ir_deplisthd)->lh_first
); (oldaip)!= ((void *)0); (oldaip) = ((oldaip)->ai_next.le_next
))
1809 if (oldaip->ai_offset == aip->ai_offset)
1810 break;
1811 freefrag = NULL((void *)0);
1812 if (oldaip != NULL((void *)0)) {
1813 if (oldaip->ai_newblkno != aip->ai_oldblkno) {
1814 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1815 panic("setup_allocindir_phase2: blkno");
1816 }
1817 aip->ai_oldblkno = oldaip->ai_oldblkno;
1818 freefrag = aip->ai_freefrag;
1819 aip->ai_freefrag = oldaip->ai_freefrag;
1820 oldaip->ai_freefrag = NULL((void *)0);
1821 free_allocindir(oldaip, NULL((void *)0));
1822 }
1823 LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_deplisthd
)->lh_first) != ((void *)0)) (&indirdep->ir_deplisthd
)->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_deplisthd)->lh_first = (aip); (aip
)->ai_next.le_prev = &(&indirdep->ir_deplisthd)
->lh_first; } while (0)
;
1824 if (ip->i_ump->um_fstype == UM_UFS11)
1825 ((int32_t *)indirdep->ir_savebp->b_data)
1826 [aip->ai_offset] = aip->ai_oldblkno;
1827 else
1828 ((int64_t *)indirdep->ir_savebp->b_data)
1829 [aip->ai_offset] = aip->ai_oldblkno;
1830 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1831 if (freefrag != NULL((void *)0))
1832 handle_workitem_freefrag(freefrag);
1833 }
1834 if (newindirdep) {
1835 if (indirdep->ir_savebp != NULL((void *)0))
1836 brelse(newindirdep->ir_savebp);
1837 WORKITEM_FREE(newindirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)newindirdep);
1838 }
1839 if (indirdep)
1840 break;
1841 newindirdep = pool_get(&indirdep_pool, PR_WAITOK0x0001);
1842 newindirdep->ir_list.wk_type = D_INDIRDEP5;
1843 newindirdep->ir_stateir_list.wk_state = ATTACHED0x0001;
1844 if (ip->i_ump->um_fstype == UM_UFS11)
1845 newindirdep->ir_stateir_list.wk_state |= UFS1FMT0x2000;
1846 LIST_INIT(&newindirdep->ir_deplisthd)do { ((&newindirdep->ir_deplisthd)->lh_first) = ((void
*)0); } while (0)
;
1847 LIST_INIT(&newindirdep->ir_donehd)do { ((&newindirdep->ir_donehd)->lh_first) = ((void
*)0); } while (0)
;
1848 if (bp->b_blkno == bp->b_lblkno) {
1849 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL((void *)0), &bp->b_blkno,
1850 NULL((void *)0));
1851 }
1852 newindirdep->ir_savebp =
1853 getblk(ip->i_devvpi_ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, INFSLP0xffffffffffffffffULL);
1854#if 0
1855 BUF_KERNPROC(newindirdep->ir_savebp);
1856#endif
1857 memcpy(newindirdep->ir_savebp->b_data, bp->b_data, bp->b_bcount)__builtin_memcpy((newindirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
1858 }
1859}
1860
1861/*
1862 * Block de-allocation dependencies.
1863 *
1864 * When blocks are de-allocated, the on-disk pointers must be nullified before
1865 * the blocks are made available for use by other files. (The true
1866 * requirement is that old pointers must be nullified before new on-disk
1867 * pointers are set. We chose this slightly more stringent requirement to
1868 * reduce complexity.) Our implementation handles this dependency by updating
1869 * the inode (or indirect block) appropriately but delaying the actual block
1870 * de-allocation (i.e., freemap and free space count manipulation) until
1871 * after the updated versions reach stable storage. After the disk is
1872 * updated, the blocks can be safely de-allocated whenever it is convenient.
1873 * This implementation handles only the common case of reducing a file's
1874 * length to zero. Other cases are handled by the conventional synchronous
1875 * write approach.
1876 *
1877 * The ffs implementation with which we worked double-checks
1878 * the state of the block pointers and file size as it reduces
1879 * a file's length. Some of this code is replicated here in our
1880 * soft updates implementation. The freeblks->fb_chkcnt field is
1881 * used to transfer a part of this information to the procedure
1882 * that eventually de-allocates the blocks.
1883 *
1884 * This routine should be called from the routine that shortens
1885 * a file's length, before the inode's size or block pointers
1886 * are modified. It will save the block pointer information for
1887 * later release and zero the inode so that the calling routine
1888 * can release it.
1889 */
1890/* The inode whose length is to be reduced */
1891/* The new length for the file */
1892void
1893softdep_setup_freeblocks(struct inode *ip, off_t length)
1894{
1895 struct freeblks *freeblks;
1896 struct inodedep *inodedep;
1897 struct allocdirect *adp;
1898 struct vnode *vp;
1899 struct buf *bp;
1900 struct fs *fs;
1901 int i, delay, error;
1902
1903 fs = ip->i_fsinode_u.fs;
1904 if (length != 0)
1905 panic("softdep_setup_freeblocks: non-zero length");
1906 freeblks = pool_get(&freeblks_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1907 freeblks->fb_list.wk_type = D_FREEBLKS8;
1908 freeblks->fb_statefb_list.wk_state = ATTACHED0x0001;
1909 freeblks->fb_uid = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
;
1910 freeblks->fb_previousinum = ip->i_number;
1911 freeblks->fb_devvp = ip->i_devvpi_ump->um_devvp;
1912 freeblks->fb_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1913 freeblks->fb_oldsize = DIP(ip, size)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_size : (ip)->dinode_u.ffs2_din->di_size)
;
1914 freeblks->fb_newsize = length;
1915 freeblks->fb_chkcnt = DIP(ip, blocks)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_blocks : (ip)->dinode_u.ffs2_din->di_blocks)
;
1916
1917 for (i = 0; i < NDADDR12; i++) {
1918 freeblks->fb_dblks[i] = DIP(ip, db[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_db[i] : (ip)->dinode_u.ffs2_din->di_db[i])
;
1919 DIP_ASSIGN(ip, db[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_db[i] = (0); else (ip)->dinode_u.ffs2_din
->di_db[i] = (0); } while (0)
;
1920 }
1921
1922 for (i = 0; i < NIADDR3; i++) {
1923 freeblks->fb_iblks[i] = DIP(ip, ib[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_ib[i] : (ip)->dinode_u.ffs2_din->di_ib[i])
;
1924 DIP_ASSIGN(ip, ib[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_ib[i] = (0); else (ip)->dinode_u.ffs2_din
->di_ib[i] = (0); } while (0)
;
1925 }
1926
1927 DIP_ASSIGN(ip, blocks, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_blocks = (0); else (ip)->dinode_u.ffs2_din
->di_blocks = (0); } while (0)
;
1928 DIP_ASSIGN(ip, size, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_size = (0); else (ip)->dinode_u.ffs2_din->
di_size = (0); } while (0)
;
1929
1930 /*
1931 * Push the zero'ed inode to its disk buffer so that we are free
1932 * to delete its dependencies below. Once the dependencies are gone
1933 * the buffer can be safely released.
1934 */
1935 if ((error = bread(ip->i_devvpi_ump->um_devvp,
1936 fsbtodb(fs, ino_to_fsba(fs, ip->i_number))((((daddr_t)(((((daddr_t)(fs)->fs_fpg * (((ip->i_number
) / (fs)->fs_ipg))) + (fs)->fs_cgoffset * ((((ip->i_number
) / (fs)->fs_ipg)) & ~((fs)->fs_cgmask))) + (fs)->
fs_iblkno) + ((((((ip->i_number) % (fs)->fs_ipg) / ((fs
)->fs_inopb))) << ((fs))->fs_fragshift))))) <<
(fs)->fs_fsbtodb)
,
1937 (int)fs->fs_bsize, &bp)) != 0)
1938 softdep_error("softdep_setup_freeblocks", error);
1939
1940 if (ip->i_ump->um_fstype == UM_UFS11)
1941 *((struct ufs1_dinode *) bp->b_data +
1942 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din1dinode_u.ffs1_din;
1943 else
1944 *((struct ufs2_dinode *) bp->b_data +
1945 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din2dinode_u.ffs2_din;
1946
1947 /*
1948 * Find and eliminate any inode dependencies.
1949 */
1950 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1951 (void) inodedep_lookup(fs, ip->i_number, DEPALLOC0x0001, &inodedep);
1952 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) != 0) {
1953 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1954 panic("softdep_setup_freeblocks: inode busy");
1955 }
1956 /*
1957 * Add the freeblks structure to the list of operations that
1958 * must await the zero'ed inode being written to disk. If we
1959 * still have a bitmap dependency (delay == 0), then the inode
1960 * has never been written to disk, so we can process the
1961 * freeblks below once we have deleted the dependencies.
1962 */
1963 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
1964 if (delay)
1965 WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list)do { (&freeblks->fb_list)->wk_state |= 0x8000; do {
if (((&freeblks->fb_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freeblks->fb_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freeblks->fb_list); (
&freeblks->fb_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
1966 /*
1967 * Because the file length has been truncated to zero, any
1968 * pending block allocation dependency structures associated
1969 * with this inode are obsolete and can simply be de-allocated.
1970 * We must first merge the two dependency lists to get rid of
1971 * any duplicate freefrag structures, then purge the merged list.
1972 * If we still have a bitmap dependency, then the inode has never
1973 * been written to disk, so we can free any fragments without delay.
1974 */
1975 merge_inode_lists(inodedep);
1976 while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
1977 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
1978 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1979 bdwrite(bp);
1980 /*
1981 * We must wait for any I/O in progress to finish so that
1982 * all potential buffers on the dirty list will be visible.
1983 * Once they are all there, walk the list and get rid of
1984 * any dependencies.
1985 */
1986 vp = ITOV(ip)((ip)->i_vnode);
1987 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1988 drain_output(vp, 1);
1989 while ((bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first))) {
1990 if (getdirtybuf(bp, MNT_WAIT1) <= 0)
1991 break;
1992 (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
1993 deallocate_dependencies(bp, inodedep);
1994 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
1995 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1996 brelse(bp);
1997 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1998 }
1999 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
2000 (void) free_inodedep(inodedep);
2001
2002 if (delay) {
2003 freeblks->fb_statefb_list.wk_state |= DEPCOMPLETE0x0008;
2004 /*
2005 * If the inode with zeroed block pointers is now on disk we
2006 * can start freeing blocks. Add freeblks to the worklist
2007 * instead of calling handle_workitem_freeblocks() directly as
2008 * it is more likely that additional IO is needed to complete
2009 * the request than in the !delay case.
2010 */
2011 if ((freeblks->fb_statefb_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2012 add_to_worklist(&freeblks->fb_list);
2013 }
2014
2015 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2016 /*
2017 * If the inode has never been written to disk (delay == 0),
2018 * then we can process the freeblks now that we have deleted
2019 * the dependencies.
2020 */
2021 if (!delay)
2022 handle_workitem_freeblocks(freeblks);
2023}
2024
2025/*
2026 * Reclaim any dependency structures from a buffer that is about to
2027 * be reallocated to a new vnode. The buffer must be locked, thus,
2028 * no I/O completion operations can occur while we are manipulating
2029 * its associated dependencies. The mutex is held so that other I/O's
2030 * associated with related dependencies do not occur.
2031 */
2032STATIC void
2033deallocate_dependencies(struct buf *bp, struct inodedep *inodedep)
2034{
2035 struct worklist *wk;
2036 struct indirdep *indirdep;
2037 struct allocindir *aip;
2038 struct pagedep *pagedep;
2039 struct dirrem *dirrem;
2040 struct diradd *dap;
2041 int i;
2042
2043 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2044 switch (wk->wk_type) {
2045
2046 case D_INDIRDEP5:
2047 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
2048 /*
2049 * None of the indirect pointers will ever be visible,
2050 * so they can simply be tossed. GOINGAWAY ensures
2051 * that allocated pointers will be saved in the buffer
2052 * cache until they are freed. Note that they will
2053 * only be able to be found by their physical address
2054 * since the inode mapping the logical address will
2055 * be gone. The save buffer used for the safe copy
2056 * was allocated in setup_allocindir_phase2 using
2057 * the physical address so it could be used for this
2058 * purpose. Hence we swap the safe copy with the real
2059 * copy, allowing the safe copy to be freed and holding
2060 * on to the real copy for later use in indir_trunc.
2061 */
2062 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) {
2063 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2064 panic("deallocate_dependencies: already gone");
2065 }
2066 indirdep->ir_stateir_list.wk_state |= GOINGAWAY0x0100;
2067 while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first)))
2068 free_allocindir(aip, inodedep);
2069 if (bp->b_lblkno >= 0 ||
2070 bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
2071 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2072 panic("deallocate_dependencies: not indir");
2073 }
2074 memcpy(indirdep->ir_savebp->b_data, bp->b_data,__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
2075 bp->b_bcount)__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
2076 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2077 WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&indirdep->ir_savebp->b_dep)->lh_first
) != ((void *)0)) (&indirdep->ir_savebp->b_dep)->
lh_first->wk_list.le_prev = &(wk)->wk_list.le_next;
(&indirdep->ir_savebp->b_dep)->lh_first = (wk);
(wk)->wk_list.le_prev = &(&indirdep->ir_savebp
->b_dep)->lh_first; } while (0); } while (0)
;
2078 continue;
2079
2080 case D_PAGEDEP0:
2081 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
2082 /*
2083 * None of the directory additions will ever be
2084 * visible, so they can simply be tossed.
2085 */
2086 for (i = 0; i < DAHASHSZ6; i++)
2087 while ((dap =
2088 LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first)))
2089 free_diradd(dap);
2090 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)))
2091 free_diradd(dap);
2092 /*
2093 * Copy any directory remove dependencies to the list
2094 * to be processed after the zero'ed inode is written.
2095 * If the inode has already been written, then they
2096 * can be dumped directly onto the work list.
2097 */
2098 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first))) {
2099 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
2100 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2101 if (inodedep == NULL((void *)0) ||
2102 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) ==
2103 ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2104 add_to_worklist(&dirrem->dm_list);
2105 else
2106 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2107 &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2108 }
2109 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2110 LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)for((wk) = ((&inodedep->id_bufwait)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
2111 if (wk->wk_type == D_NEWDIRBLK13 &&
2112 WK_NEWDIRBLK(wk)((struct newdirblk *)(wk))->db_pagedep ==
2113 pagedep)
2114 break;
2115 if (wk != NULL((void *)0)) {
2116 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2117 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
2118 } else {
2119 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2120 panic("deallocate_dependencies: "
2121 "lost pagedep");
2122 }
2123 }
2124 WORKLIST_REMOVE(&pagedep->pd_list)do { (&pagedep->pd_list)->wk_state &= ~0x8000; do
{ if ((&pagedep->pd_list)->wk_list.le_next != ((void
*)0)) (&pagedep->pd_list)->wk_list.le_next->wk_list
.le_prev = (&pagedep->pd_list)->wk_list.le_prev; *(
&pagedep->pd_list)->wk_list.le_prev = (&pagedep
->pd_list)->wk_list.le_next; ((&pagedep->pd_list
)->wk_list.le_prev) = ((void *)-1); ((&pagedep->pd_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2125 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2126 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2127 continue;
2128
2129 case D_ALLOCINDIR6:
2130 free_allocindir(WK_ALLOCINDIR(wk)((struct allocindir *)(wk)), inodedep);
2131 continue;
2132
2133 case D_ALLOCDIRECT4:
2134 case D_INODEDEP1:
2135 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2136 panic("deallocate_dependencies: Unexpected type %s",
2137 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2138 /* NOTREACHED */
2139
2140 default:
2141 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2142 panic("deallocate_dependencies: Unknown type %s",
2143 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2144 /* NOTREACHED */
2145 }
2146 }
2147}
2148
2149/*
2150 * Free an allocdirect. Generate a new freefrag work request if appropriate.
2151 * This routine must be called with splbio interrupts blocked.
2152 */
2153STATIC void
2154free_allocdirect(struct allocdirectlst *adphead, struct allocdirect *adp,
2155 int delay)
2156{
2157 struct newdirblk *newdirblk;
2158 struct worklist *wk;
2159
2160 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2161
2162#ifdef DEBUG
2163 if (lk.lkt_held == -1)
2164 panic("free_allocdirect: lock not held");
2165#endif
2166 if ((adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008) == 0)
2167 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
2168 TAILQ_REMOVE(adphead, adp, ad_next)do { if (((adp)->ad_next.tqe_next) != ((void *)0)) (adp)->
ad_next.tqe_next->ad_next.tqe_prev = (adp)->ad_next.tqe_prev
; else (adphead)->tqh_last = (adp)->ad_next.tqe_prev; *
(adp)->ad_next.tqe_prev = (adp)->ad_next.tqe_next; ((adp
)->ad_next.tqe_prev) = ((void *)-1); ((adp)->ad_next.tqe_next
) = ((void *)-1); } while (0)
;
2169 if ((adp->ad_statead_list.wk_state & COMPLETE0x0004) == 0)
2170 WORKLIST_REMOVE(&adp->ad_list)do { (&adp->ad_list)->wk_state &= ~0x8000; do {
if ((&adp->ad_list)->wk_list.le_next != ((void *)0
)) (&adp->ad_list)->wk_list.le_next->wk_list.le_prev
= (&adp->ad_list)->wk_list.le_prev; *(&adp->
ad_list)->wk_list.le_prev = (&adp->ad_list)->wk_list
.le_next; ((&adp->ad_list)->wk_list.le_prev) = ((void
*)-1); ((&adp->ad_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2171 if (adp->ad_freefrag != NULL((void *)0)) {
2172 if (delay)
2173 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
2174 &adp->ad_freefrag->ff_list)do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
;
2175 else
2176 add_to_worklist(&adp->ad_freefrag->ff_list);
2177 }
2178 if ((wk = LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
2179 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
2180 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2181 if (LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first) != NULL((void *)0))
2182 panic("free_allocdirect: extra newdirblk");
2183 if (delay)
2184 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2185 &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2186 else
2187 free_newdirblk(newdirblk);
2188 }
2189 WORKITEM_FREE(adp, D_ALLOCDIRECT)softdep_freequeue_add((struct worklist *)adp);
2190}
2191
2192/*
2193 * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
2194 * This routine must be called with splbio interrupts blocked.
2195 */
2196void
2197free_newdirblk(struct newdirblk *newdirblk)
2198{
2199 struct pagedep *pagedep;
2200 struct diradd *dap;
2201 int i;
2202
2203 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2204
2205#ifdef DEBUG
2206 if (lk.lkt_held == -1)
2207 panic("free_newdirblk: lock not held");
2208#endif
2209 /*
2210 * If the pagedep is still linked onto the directory buffer
2211 * dependency chain, then some of the entries on the
2212 * pd_pendinghd list may not be committed to disk yet. In
2213 * this case, we will simply clear the NEWBLOCK flag and
2214 * let the pd_pendinghd list be processed when the pagedep
2215 * is next written. If the pagedep is no longer on the buffer
2216 * dependency chain, then all the entries on the pd_pending
2217 * list are committed to disk and we can free them here.
2218 */
2219 pagedep = newdirblk->db_pagedep;
2220 pagedep->pd_statepd_list.wk_state &= ~NEWBLOCK0x0800;
2221 if ((pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
2222 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
2223 free_diradd(dap);
2224 /*
2225 * If no dependencies remain, the pagedep will be freed.
2226 */
2227 for (i = 0; i < DAHASHSZ6; i++)
2228 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) != NULL((void *)0))
2229 break;
2230 if (i == DAHASHSZ6 && (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0) {
2231 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2232 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2233 }
2234 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2235}
2236
2237/*
2238 * Prepare an inode to be freed. The actual free operation is not
2239 * done until the zero'ed inode has been written to disk.
2240 */
2241void
2242softdep_freefile(struct vnode *pvp, ufsino_t ino, mode_t mode)
2243{
2244 struct inode *ip = VTOI(pvp)((struct inode *)(pvp)->v_data);
2245 struct inodedep *inodedep;
2246 struct freefile *freefile;
2247
2248 /*
2249 * This sets up the inode de-allocation dependency.
2250 */
2251 freefile = pool_get(&freefile_pool, PR_WAITOK0x0001);
2252 freefile->fx_list.wk_type = D_FREEFILE9;
2253 freefile->fx_list.wk_state = 0;
2254 freefile->fx_mode = mode;
2255 freefile->fx_oldinum = ino;
2256 freefile->fx_devvp = ip->i_devvpi_ump->um_devvp;
2257 freefile->fx_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2258
2259 /*
2260 * If the inodedep does not exist, then the zero'ed inode has
2261 * been written to disk. If the allocated inode has never been
2262 * written to disk, then the on-disk inode is zero'ed. In either
2263 * case we can free the file immediately.
2264 */
2265 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2266 if (inodedep_lookup(ip->i_fsinode_u.fs, ino, 0, &inodedep) == 0 ||
2267 check_inode_unwritten(inodedep)) {
2268 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2269 handle_workitem_freefile(freefile);
2270 return;
2271 }
2272 WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list)do { (&freefile->fx_list)->wk_state |= 0x8000; do {
if (((&freefile->fx_list)->wk_list.le_next = (&
inodedep->id_inowait)->lh_first) != ((void *)0)) (&
inodedep->id_inowait)->lh_first->wk_list.le_prev = &
(&freefile->fx_list)->wk_list.le_next; (&inodedep
->id_inowait)->lh_first = (&freefile->fx_list); (
&freefile->fx_list)->wk_list.le_prev = &(&inodedep
->id_inowait)->lh_first; } while (0); } while (0)
;
2273 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2274}
2275
2276/*
2277 * Check to see if an inode has never been written to disk. If
2278 * so free the inodedep and return success, otherwise return failure.
2279 * This routine must be called with splbio interrupts blocked.
2280 *
2281 * If we still have a bitmap dependency, then the inode has never
2282 * been written to disk. Drop the dependency as it is no longer
2283 * necessary since the inode is being deallocated. We set the
2284 * ALLCOMPLETE flags since the bitmap now properly shows that the
2285 * inode is not allocated. Even if the inode is actively being
2286 * written, it has been rolled back to its zero'ed state, so we
2287 * are ensured that a zero inode is what is on the disk. For short
2288 * lived files, this change will usually result in removing all the
2289 * dependencies from the inode so that it can be freed immediately.
2290 */
2291STATIC int
2292check_inode_unwritten(struct inodedep *inodedep)
2293{
2294 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2295
2296 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 ||
2297 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2298 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2299 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2300 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2301 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2302 inodedep->id_nlinkdelta != 0)
2303 return (0);
2304 inodedep->id_stateid_list.wk_state |= ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
2305 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
2306 inodedep->id_buf = NULL((void *)0);
2307 if (inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000)
2308 WORKLIST_REMOVE(&inodedep->id_list)do { (&inodedep->id_list)->wk_state &= ~0x8000;
do { if ((&inodedep->id_list)->wk_list.le_next != (
(void *)0)) (&inodedep->id_list)->wk_list.le_next->
wk_list.le_prev = (&inodedep->id_list)->wk_list.le_prev
; *(&inodedep->id_list)->wk_list.le_prev = (&inodedep
->id_list)->wk_list.le_next; ((&inodedep->id_list
)->wk_list.le_prev) = ((void *)-1); ((&inodedep->id_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2309 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
2310 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
2311 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
2312 }
2313 if (free_inodedep(inodedep) == 0) {
2314 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2315 panic("check_inode_unwritten: busy inode");
2316 }
2317 return (1);
2318}
2319
2320/*
2321 * Try to free an inodedep structure. Return 1 if it could be freed.
2322 */
2323STATIC int
2324free_inodedep(struct inodedep *inodedep)
2325{
2326
2327 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) != 0 ||
2328 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008) ||
2329 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2330 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2331 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2332 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2333 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2334 inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0))
2335 return (0);
2336 LIST_REMOVE(inodedep, id_hash)do { if ((inodedep)->id_hash.le_next != ((void *)0)) (inodedep
)->id_hash.le_next->id_hash.le_prev = (inodedep)->id_hash
.le_prev; *(inodedep)->id_hash.le_prev = (inodedep)->id_hash
.le_next; ((inodedep)->id_hash.le_prev) = ((void *)-1); ((
inodedep)->id_hash.le_next) = ((void *)-1); } while (0)
;
2337 WORKITEM_FREE(inodedep, D_INODEDEP)softdep_freequeue_add((struct worklist *)inodedep);
2338 num_inodedep -= 1;
2339 return (1);
2340}
2341
2342/*
2343 * This workitem routine performs the block de-allocation.
2344 * The workitem is added to the pending list after the updated
2345 * inode block has been written to disk. As mentioned above,
2346 * checks regarding the number of blocks de-allocated (compared
2347 * to the number of blocks allocated for the file) are also
2348 * performed in this function.
2349 */
2350STATIC void
2351handle_workitem_freeblocks(struct freeblks *freeblks)
2352{
2353 struct inode tip;
2354 daddr_t bn;
2355 union {
2356 struct ufs1_dinode di1;
2357 struct ufs2_dinode di2;
2358 } di;
2359 struct fs *fs;
2360 int i, level, bsize;
2361 long nblocks, blocksreleased = 0;
2362 int error, allerror = 0;
2363 daddr_t baselbns[NIADDR3], tmpval;
2364
2365 if (VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fstype == UM_UFS11)
2366 tip.i_din1dinode_u.ffs1_din = &di.di1;
2367 else
2368 tip.i_din2dinode_u.ffs2_din = &di.di2;
2369
2370 tip.i_fsinode_u.fs = fs = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fsufsmount_u.fs;
2371 tip.i_number = freeblks->fb_previousinum;
2372 tip.i_ump = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data));
2373 tip.i_dev = freeblks->fb_devvp->v_rdevv_un.vu_specinfo->si_rdev;
2374 DIP_ASSIGN(&tip, size, freeblks->fb_oldsize)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_size = (freeblks->fb_oldsize); else
(&tip)->dinode_u.ffs2_din->di_size = (freeblks->
fb_oldsize); } while (0)
;
2375 DIP_ASSIGN(&tip, uid, freeblks->fb_uid)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_uid = (freeblks->fb_uid); else (&
tip)->dinode_u.ffs2_din->di_uid = (freeblks->fb_uid)
; } while (0)
;
2376 tip.i_vnode = NULL((void *)0);
2377 tmpval = 1;
2378 baselbns[0] = NDADDR12;
2379 for (i = 1; i < NIADDR3; i++) {
2380 tmpval *= NINDIR(fs)((fs)->fs_nindir);
2381 baselbns[i] = baselbns[i - 1] + tmpval;
2382 }
2383 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2384 blocksreleased = 0;
2385 /*
2386 * Indirect blocks first.
2387 */
2388 for (level = (NIADDR3 - 1); level >= 0; level--) {
2389 if ((bn = freeblks->fb_iblks[level]) == 0)
2390 continue;
2391 if ((error = indir_trunc(&tip, fsbtodb(fs, bn)((bn) << (fs)->fs_fsbtodb), level,
2392 baselbns[level], &blocksreleased)) != 0)
2393 allerror = error;
2394 ffs_blkfree(&tip, bn, fs->fs_bsize);
2395 blocksreleased += nblocks;
2396 }
2397 /*
2398 * All direct blocks or frags.
2399 */
2400 for (i = (NDADDR12 - 1); i >= 0; i--) {
2401 if ((bn = freeblks->fb_dblks[i]) == 0)
2402 continue;
2403 bsize = blksize(fs, &tip, i)(((i) >= 12 || ((((&tip))->i_ump->um_fstype == 1
) ? ((&tip))->dinode_u.ffs1_din->di_size : ((&tip
))->dinode_u.ffs2_din->di_size) >= ((i) + 1) <<
(fs)->fs_bshift) ? (u_int64_t)(fs)->fs_bsize : (((((((
(((&tip))->i_ump->um_fstype == 1) ? ((&tip))->
dinode_u.ffs1_din->di_size : ((&tip))->dinode_u.ffs2_din
->di_size)) & (fs)->fs_qbmask)) + (fs)->fs_qfmask
) & (fs)->fs_fmask)))
;
2404 ffs_blkfree(&tip, bn, bsize);
2405 blocksreleased += btodb(bsize)((bsize) >> 9);
2406 }
2407
2408#ifdef DIAGNOSTIC1
2409 if (freeblks->fb_chkcnt != blocksreleased)
2410 printf("handle_workitem_freeblocks: block count\n");
2411 if (allerror)
2412 softdep_error("handle_workitem_freeblks", allerror);
2413#endif /* DIAGNOSTIC */
2414 WORKITEM_FREE(freeblks, D_FREEBLKS)softdep_freequeue_add((struct worklist *)freeblks);
2415}
2416
2417/*
2418 * Release blocks associated with the inode ip and stored in the indirect
2419 * block dbn. If level is greater than SINGLE, the block is an indirect block
2420 * and recursive calls to indirtrunc must be used to cleanse other indirect
2421 * blocks.
2422 */
2423STATIC int
2424indir_trunc(struct inode *ip, daddr_t dbn, int level, daddr_t lbn,
2425 long *countp)
2426{
2427 struct buf *bp;
2428 int32_t *bap1 = NULL((void *)0);
2429 int64_t nb, *bap2 = NULL((void *)0);
2430 struct fs *fs;
2431 struct worklist *wk;
2432 struct indirdep *indirdep;
2433 int i, lbnadd, nblocks, ufs1fmt;
2434 int error, allerror = 0;
2435
2436 fs = ip->i_fsinode_u.fs;
2437 lbnadd = 1;
2438 for (i = level; i > 0; i--)
2439 lbnadd *= NINDIR(fs)((fs)->fs_nindir);
2440 /*
2441 * Get buffer of block pointers to be freed. This routine is not
2442 * called until the zero'ed inode has been written, so it is safe
2443 * to free blocks as they are encountered. Because the inode has
2444 * been zero'ed, calls to bmap on these blocks will fail. So, we
2445 * have to use the on-disk address and the block device for the
2446 * filesystem to look them up. If the file was deleted before its
2447 * indirect blocks were all written to disk, the routine that set
2448 * us up (deallocate_dependencies) will have arranged to leave
2449 * a complete copy of the indirect block in memory for our use.
2450 * Otherwise we have to read the blocks in from the disk.
2451 */
2452 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2453 if ((bp = incore(ip->i_devvpi_ump->um_devvp, dbn)) != NULL((void *)0) &&
2454 (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2455 if (wk->wk_type != D_INDIRDEP5 ||
2456 (indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk)))->ir_savebp != bp ||
2457 (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) == 0) {
2458 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2459 panic("indir_trunc: lost indirdep");
2460 }
2461 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2462 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
2463 if (LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first) != NULL((void *)0)) {
2464 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2465 panic("indir_trunc: dangling dep");
2466 }
2467 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2468 } else {
2469 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2470 error = bread(ip->i_devvpi_ump->um_devvp, dbn, (int)fs->fs_bsize, &bp);
2471 if (error)
2472 return (error);
2473 }
2474 /*
2475 * Recursively free indirect blocks.
2476 */
2477 if (ip->i_ump->um_fstype == UM_UFS11) {
2478 ufs1fmt = 1;
2479 bap1 = (int32_t *)bp->b_data;
2480 } else {
2481 ufs1fmt = 0;
2482 bap2 = (int64_t *)bp->b_data;
2483 }
2484 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2485 for (i = NINDIR(fs)((fs)->fs_nindir) - 1; i >= 0; i--) {
2486 if (ufs1fmt)
2487 nb = bap1[i];
2488 else
2489 nb = bap2[i];
2490 if (nb == 0)
2491 continue;
2492 if (level != 0) {
2493 if ((error = indir_trunc(ip, fsbtodb(fs, nb)((nb) << (fs)->fs_fsbtodb),
2494 level - 1, lbn + (i * lbnadd), countp)) != 0)
2495 allerror = error;
2496 }
2497 ffs_blkfree(ip, nb, fs->fs_bsize);
2498 *countp += nblocks;
2499 }
2500 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
2501 brelse(bp);
2502 return (allerror);
2503}
2504
2505/*
2506 * Free an allocindir.
2507 * This routine must be called with splbio interrupts blocked.
2508 */
2509STATIC void
2510free_allocindir(struct allocindir *aip, struct inodedep *inodedep)
2511{
2512 struct freefrag *freefrag;
2513
2514 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2515
2516#ifdef DEBUG
2517 if (lk.lkt_held == -1)
2518 panic("free_allocindir: lock not held");
2519#endif
2520 if ((aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008) == 0)
2521 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
2522 if (aip->ai_stateai_list.wk_state & ONWORKLIST0x8000)
2523 WORKLIST_REMOVE(&aip->ai_list)do { (&aip->ai_list)->wk_state &= ~0x8000; do {
if ((&aip->ai_list)->wk_list.le_next != ((void *)0
)) (&aip->ai_list)->wk_list.le_next->wk_list.le_prev
= (&aip->ai_list)->wk_list.le_prev; *(&aip->
ai_list)->wk_list.le_prev = (&aip->ai_list)->wk_list
.le_next; ((&aip->ai_list)->wk_list.le_prev) = ((void
*)-1); ((&aip->ai_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2524 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
2525 if ((freefrag = aip->ai_freefrag) != NULL((void *)0)) {
2526 if (inodedep == NULL((void *)0))
2527 add_to_worklist(&freefrag->ff_list);
2528 else
2529 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
2530 &freefrag->ff_list)do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
2531 }
2532 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
2533}
2534
2535/*
2536 * Directory entry addition dependencies.
2537 *
2538 * When adding a new directory entry, the inode (with its incremented link
2539 * count) must be written to disk before the directory entry's pointer to it.
2540 * Also, if the inode is newly allocated, the corresponding freemap must be
2541 * updated (on disk) before the directory entry's pointer. These requirements
2542 * are met via undo/redo on the directory entry's pointer, which consists
2543 * simply of the inode number.
2544 *
2545 * As directory entries are added and deleted, the free space within a
2546 * directory block can become fragmented. The ufs file system will compact
2547 * a fragmented directory block to make space for a new entry. When this
2548 * occurs, the offsets of previously added entries change. Any "diradd"
2549 * dependency structures corresponding to these entries must be updated with
2550 * the new offsets.
2551 */
2552
2553/*
2554 * This routine is called after the in-memory inode's link
2555 * count has been incremented, but before the directory entry's
2556 * pointer to the inode has been set.
2557 */
2558/* buffer containing directory block */
2559/* inode for directory */
2560/* offset of new entry in directory */
2561/* inode referenced by new directory entry */
2562/* non-NULL => contents of new mkdir */
2563/* entry is in a newly allocated block */
2564int
2565softdep_setup_directory_add(struct buf *bp, struct inode *dp, off_t diroffset,
2566 long newinum, struct buf *newdirbp, int isnewblk)
2567{
2568 int offset; /* offset of new entry within directory block */
2569 daddr_t lbn; /* block in directory containing new entry */
2570 struct fs *fs;
2571 struct diradd *dap;
2572 struct allocdirect *adp;
2573 struct pagedep *pagedep;
2574 struct inodedep *inodedep;
2575 struct newdirblk *newdirblk = NULL((void *)0);
2576 struct mkdir *mkdir1, *mkdir2;
2577
2578
2579 fs = dp->i_fsinode_u.fs;
2580 lbn = lblkno(fs, diroffset)((diroffset) >> (fs)->fs_bshift);
2581 offset = blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask);
2582 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2583 dap->da_list.wk_type = D_DIRADD10;
2584 dap->da_offset = offset;
2585 dap->da_newinum = newinum;
2586 dap->da_stateda_list.wk_state = ATTACHED0x0001;
2587 if (isnewblk && lbn < NDADDR12 && fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) == 0) {
2588 newdirblk = pool_get(&newdirblk_pool, PR_WAITOK0x0001);
2589 newdirblk->db_list.wk_type = D_NEWDIRBLK13;
2590 newdirblk->db_statedb_list.wk_state = 0;
2591 }
2592 if (newdirbp == NULL((void *)0)) {
2593 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
2594 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2595 } else {
2596 dap->da_stateda_list.wk_state |= MKDIR_BODY0x0020 | MKDIR_PARENT0x0010;
2597 mkdir1 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2598 mkdir1->md_list.wk_type = D_MKDIR11;
2599 mkdir1->md_statemd_list.wk_state = MKDIR_BODY0x0020;
2600 mkdir1->md_diradd = dap;
2601 mkdir2 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2602 mkdir2->md_list.wk_type = D_MKDIR11;
2603 mkdir2->md_statemd_list.wk_state = MKDIR_PARENT0x0010;
2604 mkdir2->md_diradd = dap;
2605 /*
2606 * Dependency on "." and ".." being written to disk.
2607 */
2608 mkdir1->md_buf = newdirbp;
2609 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2610 LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs)do { if (((mkdir1)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir1)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir1); (mkdir1)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2611 WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list)do { (&mkdir1->md_list)->wk_state |= 0x8000; do { if
(((&mkdir1->md_list)->wk_list.le_next = (&newdirbp
->b_dep)->lh_first) != ((void *)0)) (&newdirbp->
b_dep)->lh_first->wk_list.le_prev = &(&mkdir1->
md_list)->wk_list.le_next; (&newdirbp->b_dep)->lh_first
= (&mkdir1->md_list); (&mkdir1->md_list)->wk_list
.le_prev = &(&newdirbp->b_dep)->lh_first; } while
(0); } while (0)
;
2612 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2613 bdwrite(newdirbp);
2614 /*
2615 * Dependency on link count increase for parent directory
2616 */
2617 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2618 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
2619 || (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
2620 dap->da_stateda_list.wk_state &= ~MKDIR_PARENT0x0010;
2621 WORKITEM_FREE(mkdir2, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir2);
2622 } else {
2623 LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs)do { if (((mkdir2)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir2)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir2); (mkdir2)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2624 WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list)do { (&mkdir2->md_list)->wk_state |= 0x8000; do { if
(((&mkdir2->md_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
mkdir2->md_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&mkdir2->md_list); (&mkdir2->
md_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2625 }
2626 }
2627 /*
2628 * Link into parent directory pagedep to await its being written.
2629 */
2630 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2631 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2632 dap->da_pagedepda_un.dau_pagedep = pagedep;
2633 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
2634 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
2635 /*
2636 * Link into its inodedep. Put it on the id_bufwait list if the inode
2637 * is not yet written. If it is written, do the post-inode write
2638 * processing to put it on the id_pendinghd list.
2639 */
2640 (void) inodedep_lookup(fs, newinum, DEPALLOC0x0001, &inodedep);
2641 if ((inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2642 diradd_inode_written(dap, inodedep);
2643 else
2644 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
2645 if (isnewblk) {
2646 /*
2647 * Directories growing into indirect blocks are rare
2648 * enough and the frequency of new block allocation
2649 * in those cases even more rare, that we choose not
2650 * to bother tracking them. Rather we simply force the
2651 * new directory entry to disk.
2652 */
2653 if (lbn >= NDADDR12) {
2654 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2655 /*
2656 * We only have a new allocation when at the
2657 * beginning of a new block, not when we are
2658 * expanding into an existing block.
2659 */
2660 if (blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask) == 0)
2661 return (1);
2662 return (0);
2663 }
2664 /*
2665 * We only have a new allocation when at the beginning
2666 * of a new fragment, not when we are expanding into an
2667 * existing fragment. Also, there is nothing to do if we
2668 * are already tracking this block.
2669 */
2670 if (fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) != 0) {
2671 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2672 return (0);
2673 }
2674
2675 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2676 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2677 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2678 return (0);
2679 }
2680 /*
2681 * Find our associated allocdirect and have it track us.
2682 */
2683 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
2684 panic("softdep_setup_directory_add: lost inodedep");
2685 adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst)(*(((struct allocdirectlst *)((&inodedep->id_newinoupdt
)->tqh_last))->tqh_last))
;
2686 if (adp == NULL((void *)0) || adp->ad_lbn != lbn) {
2687 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2688 panic("softdep_setup_directory_add: lost entry");
2689 }
2690 pagedep->pd_statepd_list.wk_state |= NEWBLOCK0x0800;
2691 newdirblk->db_pagedep = pagedep;
2692 WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_newdirblk)->lh_first) != ((void *)0)) (&adp
->ad_newdirblk)->lh_first->wk_list.le_prev = &(&
newdirblk->db_list)->wk_list.le_next; (&adp->ad_newdirblk
)->lh_first = (&newdirblk->db_list); (&newdirblk
->db_list)->wk_list.le_prev = &(&adp->ad_newdirblk
)->lh_first; } while (0); } while (0)
;
2693 }
2694 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2695 return (0);
2696}
2697
2698/*
2699 * This procedure is called to change the offset of a directory
2700 * entry when compacting a directory block which must be owned
2701 * exclusively by the caller. Note that the actual entry movement
2702 * must be done in this procedure to ensure that no I/O completions
2703 * occur while the move is in progress.
2704 */
2705/* inode for directory */
2706/* address of dp->i_offset */
2707/* address of old directory location */
2708/* address of new directory location */
2709/* size of directory entry */
2710void
2711softdep_change_directoryentry_offset(struct inode *dp, caddr_t base,
2712 caddr_t oldloc, caddr_t newloc, int entrysize)
2713{
2714 int offset, oldoffset, newoffset;
2715 struct pagedep *pagedep;
2716 struct diradd *dap;
2717 daddr_t lbn;
2718
2719 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2720 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2721 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2722 if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
2723 goto done;
2724 oldoffset = offset + (oldloc - base);
2725 newoffset = offset + (newloc - base);
2726
2727 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((oldoffset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
{
2728 if (dap->da_offset != oldoffset)
2729 continue;
2730 dap->da_offset = newoffset;
2731 if (DIRADDHASH(newoffset)(((newoffset) >> 2) % 6) == DIRADDHASH(oldoffset)(((oldoffset) >> 2) % 6))
2732 break;
2733 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2734 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
2735 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
;
2736 break;
2737 }
2738 if (dap == NULL((void *)0)) {
2739
2740 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
2741 if (dap->da_offset == oldoffset) {
2742 dap->da_offset = newoffset;
2743 break;
2744 }
2745 }
2746 }
2747done:
2748 memmove(newloc, oldloc, entrysize)__builtin_memmove((newloc), (oldloc), (entrysize));
2749 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2750}
2751
2752/*
2753 * Free a diradd dependency structure. This routine must be called
2754 * with splbio interrupts blocked.
2755 */
2756STATIC void
2757free_diradd(struct diradd *dap)
2758{
2759 struct dirrem *dirrem;
2760 struct pagedep *pagedep;
2761 struct inodedep *inodedep;
2762 struct mkdir *mkdir, *nextmd;
2763
2764 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2765
2766#ifdef DEBUG
2767 if (lk.lkt_held == -1)
2768 panic("free_diradd: lock not held");
2769#endif
2770 WORKLIST_REMOVE(&dap->da_list)do { (&dap->da_list)->wk_state &= ~0x8000; do {
if ((&dap->da_list)->wk_list.le_next != ((void *)0
)) (&dap->da_list)->wk_list.le_next->wk_list.le_prev
= (&dap->da_list)->wk_list.le_prev; *(&dap->
da_list)->wk_list.le_prev = (&dap->da_list)->wk_list
.le_next; ((&dap->da_list)->wk_list.le_prev) = ((void
*)-1); ((&dap->da_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2771 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2772 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) == 0) {
2773 pagedep = dap->da_pagedepda_un.dau_pagedep;
2774 } else {
2775 dirrem = dap->da_previousda_un.dau_previous;
2776 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2777 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2778 add_to_worklist(&dirrem->dm_list);
2779 }
2780 if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)((struct ufsmount *)((pagedep->pd_mnt)->mnt_data))->um_fsufsmount_u.fs, dap->da_newinum,
2781 0, &inodedep) != 0)
2782 (void) free_inodedep(inodedep);
2783 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2784 for (mkdir = LIST_FIRST(&mkdirlisthd)((&mkdirlisthd)->lh_first); mkdir; mkdir = nextmd) {
2785 nextmd = LIST_NEXT(mkdir, md_mkdirs)((mkdir)->md_mkdirs.le_next);
2786 if (mkdir->md_diradd != dap)
2787 continue;
2788 dap->da_stateda_list.wk_state &= ~mkdir->md_statemd_list.wk_state;
2789 WORKLIST_REMOVE(&mkdir->md_list)do { (&mkdir->md_list)->wk_state &= ~0x8000; do
{ if ((&mkdir->md_list)->wk_list.le_next != ((void
*)0)) (&mkdir->md_list)->wk_list.le_next->wk_list
.le_prev = (&mkdir->md_list)->wk_list.le_prev; *(&
mkdir->md_list)->wk_list.le_prev = (&mkdir->md_list
)->wk_list.le_next; ((&mkdir->md_list)->wk_list.
le_prev) = ((void *)-1); ((&mkdir->md_list)->wk_list
.le_next) = ((void *)-1); } while (0); } while (0)
;
2790 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
2791 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
2792 }
2793 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2794 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2795 panic("free_diradd: unfound ref");
2796 }
2797 }
2798 WORKITEM_FREE(dap, D_DIRADD)softdep_freequeue_add((struct worklist *)dap);
2799}
2800
2801/*
2802 * Directory entry removal dependencies.
2803 *
2804 * When removing a directory entry, the entry's inode pointer must be
2805 * zero'ed on disk before the corresponding inode's link count is decremented
2806 * (possibly freeing the inode for re-use). This dependency is handled by
2807 * updating the directory entry but delaying the inode count reduction until
2808 * after the directory block has been written to disk. After this point, the
2809 * inode count can be decremented whenever it is convenient.
2810 */
2811
2812/*
2813 * This routine should be called immediately after removing
2814 * a directory entry. The inode's link count should not be
2815 * decremented by the calling procedure -- the soft updates
2816 * code will do this task when it is safe.
2817 */
2818/* buffer containing directory block */
2819/* inode for the directory being modified */
2820/* inode for directory entry being removed */
2821/* indicates if doing RMDIR */
2822void
2823softdep_setup_remove(struct buf *bp, struct inode *dp, struct inode *ip,
2824 int isrmdir)
2825{
2826 struct dirrem *dirrem, *prevdirrem;
2827
2828 /*
2829 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
2830 */
2831 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2832
2833 /*
2834 * If the COMPLETE flag is clear, then there were no active
2835 * entries and we want to roll back to a zeroed entry until
2836 * the new inode is committed to disk. If the COMPLETE flag is
2837 * set then we have deleted an entry that never made it to
2838 * disk. If the entry we deleted resulted from a name change,
2839 * then the old name still resides on disk. We cannot delete
2840 * its inode (returned to us in prevdirrem) until the zeroed
2841 * directory entry gets to disk. The new inode has never been
2842 * referenced on the disk, so can be deleted immediately.
2843 */
2844 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
2845 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
2846 dm_next)do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
;
2847 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2848 } else {
2849 if (prevdirrem != NULL((void *)0))
2850 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
2851 prevdirrem, dm_next)do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
;
2852 dirrem->dm_dirinumdm_un.dmu_dirinum = dirrem->dm_pagedepdm_un.dmu_pagedep->pd_ino;
2853 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2854 handle_workitem_remove(dirrem);
2855 }
2856}
2857
2858STATIC long num_dirrem; /* number of dirrem allocated */
2859/*
2860 * Allocate a new dirrem if appropriate and return it along with
2861 * its associated pagedep. Called without a lock, returns with lock.
2862 */
2863/* buffer containing directory block */
2864/* inode for the directory being modified */
2865/* inode for directory entry being removed */
2866/* indicates if doing RMDIR */
2867/* previously referenced inode, if any */
2868STATIC struct dirrem *
2869newdirrem(struct buf *bp, struct inode *dp, struct inode *ip, int isrmdir,
2870 struct dirrem **prevdirremp)
2871{
2872 int offset;
2873 daddr_t lbn;
2874 struct diradd *dap;
2875 struct dirrem *dirrem;
2876 struct pagedep *pagedep;
2877
2878 /*
2879 * Whiteouts have no deletion dependencies.
2880 */
2881 if (ip == NULL((void *)0))
2882 panic("newdirrem: whiteout");
2883 /*
2884 * If we are over our limit, try to improve the situation.
2885 * Limiting the number of dirrem structures will also limit
2886 * the number of freefile and freeblks structures.
2887 */
2888 if (num_dirrem > max_softdeps / 2)
2889 (void) request_cleanup(FLUSH_REMOVE2, 0);
2890 num_dirrem += 1;
2891 dirrem = pool_get(&dirrem_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2892 dirrem->dm_list.wk_type = D_DIRREM12;
2893 dirrem->dm_statedm_list.wk_state = isrmdir ? RMDIR0x0040 : 0;
2894 dirrem->dm_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2895 dirrem->dm_oldinum = ip->i_number;
2896 *prevdirremp = NULL((void *)0);
2897
2898 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2899 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2900 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2901 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2902 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2903 dirrem->dm_pagedepdm_un.dmu_pagedep = pagedep;
2904 /*
2905 * Check for a diradd dependency for the same directory entry.
2906 * If present, then both dependencies become obsolete and can
2907 * be de-allocated. Check for an entry on both the pd_dirraddhd
2908 * list and the pd_pendinghd list.
2909 */
2910
2911 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((offset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
2912 if (dap->da_offset == offset)
2913 break;
2914 if (dap == NULL((void *)0)) {
2915
2916 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
2917 if (dap->da_offset == offset)
2918 break;
2919 if (dap == NULL((void *)0))
2920 return (dirrem);
2921 }
2922 /*
2923 * Must be ATTACHED at this point.
2924 */
2925 if ((dap->da_stateda_list.wk_state & ATTACHED0x0001) == 0) {
2926 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2927 panic("newdirrem: not ATTACHED");
2928 }
2929 if (dap->da_newinum != ip->i_number) {
2930 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2931 panic("newdirrem: inum %u should be %u",
2932 ip->i_number, dap->da_newinum);
2933 }
2934 /*
2935 * If we are deleting a changed name that never made it to disk,
2936 * then return the dirrem describing the previous inode (which
2937 * represents the inode currently referenced from this entry on disk).
2938 */
2939 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) != 0) {
2940 *prevdirremp = dap->da_previousda_un.dau_previous;
2941 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
2942 dap->da_pagedepda_un.dau_pagedep = pagedep;
2943 }
2944 /*
2945 * We are deleting an entry that never made it to disk.
2946 * Mark it COMPLETE so we can delete its inode immediately.
2947 */
2948 dirrem->dm_statedm_list.wk_state |= COMPLETE0x0004;
2949 free_diradd(dap);
2950 return (dirrem);
2951}
2952
2953/*
2954 * Directory entry change dependencies.
2955 *
2956 * Changing an existing directory entry requires that an add operation
2957 * be completed first followed by a deletion. The semantics for the addition
2958 * are identical to the description of adding a new entry above except
2959 * that the rollback is to the old inode number rather than zero. Once
2960 * the addition dependency is completed, the removal is done as described
2961 * in the removal routine above.
2962 */
2963
2964/*
2965 * This routine should be called immediately after changing
2966 * a directory entry. The inode's link count should not be
2967 * decremented by the calling procedure -- the soft updates
2968 * code will perform this task when it is safe.
2969 */
2970/* buffer containing directory block */
2971/* inode for the directory being modified */
2972/* inode for directory entry being removed */
2973/* new inode number for changed entry */
2974/* indicates if doing RMDIR */
2975void
2976softdep_setup_directory_change(struct buf *bp, struct inode *dp,
2977 struct inode *ip, long newinum, int isrmdir)
2978{
2979 int offset;
2980 struct diradd *dap;
2981 struct dirrem *dirrem, *prevdirrem;
2982 struct pagedep *pagedep;
2983 struct inodedep *inodedep;
2984
2985 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2986 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2987 dap->da_list.wk_type = D_DIRADD10;
2988 dap->da_stateda_list.wk_state = DIRCHG0x0080 | ATTACHED0x0001 | DEPCOMPLETE0x0008;
2989 dap->da_offset = offset;
2990 dap->da_newinum = newinum;
2991
2992 /*
2993 * Allocate a new dirrem and ACQUIRE_LOCK.
2994 */
2995 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2996 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2997 /*
2998 * The possible values for isrmdir:
2999 * 0 - non-directory file rename
3000 * 1 - directory rename within same directory
3001 * inum - directory rename to new directory of given inode number
3002 * When renaming to a new directory, we are both deleting and
3003 * creating a new directory entry, so the link count on the new
3004 * directory should not change. Thus we do not need the followup
3005 * dirrem which is usually done in handle_workitem_remove. We set
3006 * the DIRCHG flag to tell handle_workitem_remove to skip the
3007 * followup dirrem.
3008 */
3009 if (isrmdir > 1)
3010 dirrem->dm_statedm_list.wk_state |= DIRCHG0x0080;
3011
3012 /*
3013 * If the COMPLETE flag is clear, then there were no active
3014 * entries and we want to roll back to the previous inode until
3015 * the new inode is committed to disk. If the COMPLETE flag is
3016 * set, then we have deleted an entry that never made it to disk.
3017 * If the entry we deleted resulted from a name change, then the old
3018 * inode reference still resides on disk. Any rollback that we do
3019 * needs to be to that old inode (returned to us in prevdirrem). If
3020 * the entry we deleted resulted from a create, then there is
3021 * no entry on the disk, so we want to roll back to zero rather
3022 * than the uncommitted inode. In either of the COMPLETE cases we
3023 * want to immediately free the unwritten and unreferenced inode.
3024 */
3025 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
3026 dap->da_previousda_un.dau_previous = dirrem;
3027 } else {
3028 if (prevdirrem != NULL((void *)0)) {
3029 dap->da_previousda_un.dau_previous = prevdirrem;
3030 } else {
3031 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
3032 dap->da_pagedepda_un.dau_pagedep = pagedep;
3033 }
3034 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
3035 add_to_worklist(&dirrem->dm_list);
3036 }
3037 /*
3038 * Link into its inodedep. Put it on the id_bufwait list if the inode
3039 * is not yet written. If it is written, do the post-inode write
3040 * processing to put it on the id_pendinghd list.
3041 */
3042 if (inodedep_lookup(dp->i_fsinode_u.fs, newinum, DEPALLOC0x0001, &inodedep) == 0 ||
3043 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
3044 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
3045 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
3046 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
3047 } else {
3048 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
3049 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
3050 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
3051 }
3052 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3053}
3054
3055/*
3056 * Called whenever the link count on an inode is changed.
3057 * It creates an inode dependency so that the new reference(s)
3058 * to the inode cannot be committed to disk until the updated
3059 * inode has been written.
3060 */
3061/* the inode with the increased link count */
3062/* do background work or not */
3063void
3064softdep_change_linkcnt(struct inode *ip, int nodelay)
3065{
3066 struct inodedep *inodedep;
3067 int flags;
3068
3069 /*
3070 * If requested, do not allow background work to happen.
3071 */
3072 flags = DEPALLOC0x0001;
3073 if (nodelay)
3074 flags |= NODELAY0x0002;
3075
3076 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3077
3078 (void) inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, flags, &inodedep);
3079 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3080 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3081 panic("softdep_change_linkcnt: bad delta");
3082 }
3083
3084 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3085
3086 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3087}
3088
3089/*
3090 * This workitem decrements the inode's link count.
3091 * If the link count reaches zero, the file is removed.
3092 */
3093STATIC void
3094handle_workitem_remove(struct dirrem *dirrem)
3095{
3096 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
3097 struct inodedep *inodedep;
3098 struct vnode *vp;
3099 struct inode *ip;
3100 ufsino_t oldinum;
3101 int error;
3102
3103 if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)(*(dirrem->dm_mnt)->mnt_op->vfs_vget)(dirrem->dm_mnt
, dirrem->dm_oldinum, &vp)
) != 0) {
3104 softdep_error("handle_workitem_remove: vget", error);
3105 return;
3106 }
3107 ip = VTOI(vp)((struct inode *)(vp)->v_data);
3108 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3109 if ((inodedep_lookup(ip->i_fsinode_u.fs, dirrem->dm_oldinum, 0, &inodedep))
3110 == 0) {
3111 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3112 panic("handle_workitem_remove: lost inodedep");
3113 }
3114 /*
3115 * Normal file deletion.
3116 */
3117 if ((dirrem->dm_statedm_list.wk_state & RMDIR0x0040) == 0) {
3118 DIP_ADD(ip, nlink, -1)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-1); else (ip)->dinode_u.ffs2_din
->di_nlink += (-1); } while (0)
;
3119 ip->i_flag |= IN_CHANGE0x0002;
3120 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3121 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3122 panic("handle_workitem_remove: bad file delta");
3123 }
3124 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3125 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3126 vput(vp);
3127 num_dirrem -= 1;
3128 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3129 return;
3130 }
3131 /*
3132 * Directory deletion. Decrement reference count for both the
3133 * just deleted parent directory entry and the reference for ".".
3134 * Next truncate the directory to length zero. When the
3135 * truncation completes, arrange to have the reference count on
3136 * the parent decremented to account for the loss of "..".
3137 */
3138 DIP_ADD(ip, nlink, -2)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-2); else (ip)->dinode_u.ffs2_din
->di_nlink += (-2); } while (0)
;
3139 ip->i_flag |= IN_CHANGE0x0002;
3140 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink)
3141 panic("handle_workitem_remove: bad dir delta");
3142 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3143 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3144 if ((error = UFS_TRUNCATE(ip, (off_t)0, 0, p->p_ucred)((ip)->i_vtbl->iv_truncate)((ip), ((off_t)0), (0), (p->
p_ucred))
) != 0)
3145 softdep_error("handle_workitem_remove: truncate", error);
3146 /*
3147 * Rename a directory to a new parent. Since, we are both deleting
3148 * and creating a new directory entry, the link count on the new
3149 * directory should not change. Thus we skip the followup dirrem.
3150 */
3151 if (dirrem->dm_statedm_list.wk_state & DIRCHG0x0080) {
3152 vput(vp);
3153 num_dirrem -= 1;
3154 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3155 return;
3156 }
3157 /*
3158 * If the inodedep does not exist, then the zero'ed inode has
3159 * been written to disk. If the allocated inode has never been
3160 * written to disk, then the on-disk inode is zero'ed. In either
3161 * case we can remove the file immediately.
3162 */
3163 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3164 dirrem->dm_statedm_list.wk_state = 0;
3165 oldinum = dirrem->dm_oldinum;
3166 dirrem->dm_oldinum = dirrem->dm_dirinumdm_un.dmu_dirinum;
3167 if (inodedep_lookup(ip->i_fsinode_u.fs, oldinum, 0, &inodedep) == 0 ||
3168 check_inode_unwritten(inodedep)) {
3169 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3170 vput(vp);
3171 handle_workitem_remove(dirrem);
3172 return;
3173 }
3174 WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_inowait)->lh_first) != ((void *)0)) (&inodedep
->id_inowait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_inowait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_inowait
)->lh_first; } while (0); } while (0)
;
3175 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3176 ip->i_flag |= IN_CHANGE0x0002;
3177 UFS_UPDATE(VTOI(vp), 0)((((struct inode *)(vp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(vp)->v_data)), (0))
;
3178 vput(vp);
3179}
3180
3181/*
3182 * Inode de-allocation dependencies.
3183 *
3184 * When an inode's link count is reduced to zero, it can be de-allocated. We
3185 * found it convenient to postpone de-allocation until after the inode is
3186 * written to disk with its new link count (zero). At this point, all of the
3187 * on-disk inode's block pointers are nullified and, with careful dependency
3188 * list ordering, all dependencies related to the inode will be satisfied and
3189 * the corresponding dependency structures de-allocated. So, if/when the
3190 * inode is reused, there will be no mixing of old dependencies with new
3191 * ones. This artificial dependency is set up by the block de-allocation
3192 * procedure above (softdep_setup_freeblocks) and completed by the
3193 * following procedure.
3194 */
3195STATIC void
3196handle_workitem_freefile(struct freefile *freefile)
3197{
3198 struct fs *fs;
3199 struct vnode vp;
3200 struct inode tip;
3201#ifdef DEBUG
3202 struct inodedep *idp;
3203#endif
3204 int error;
3205
3206 fs = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data))->um_fsufsmount_u.fs;
3207#ifdef DEBUG
3208 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3209 error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
3210 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3211 if (error)
3212 panic("handle_workitem_freefile: inodedep survived");
3213#endif
3214 tip.i_ump = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data));
3215 tip.i_dev = freefile->fx_devvp->v_rdevv_un.vu_specinfo->si_rdev;
3216 tip.i_fsinode_u.fs = fs;
3217 tip.i_vnode = &vp;
3218 vp.v_data = &tip;
3219
3220 if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
3221 freefile->fx_mode)) != 0) {
3222 softdep_error("handle_workitem_freefile", error);
3223 }
3224 WORKITEM_FREE(freefile, D_FREEFILE)softdep_freequeue_add((struct worklist *)freefile);
3225}
3226
3227/*
3228 * Disk writes.
3229 *
3230 * The dependency structures constructed above are most actively used when file
3231 * system blocks are written to disk. No constraints are placed on when a
3232 * block can be written, but unsatisfied update dependencies are made safe by
3233 * modifying (or replacing) the source memory for the duration of the disk
3234 * write. When the disk write completes, the memory block is again brought
3235 * up-to-date.
3236 *
3237 * In-core inode structure reclamation.
3238 *
3239 * Because there are a finite number of "in-core" inode structures, they are
3240 * reused regularly. By transferring all inode-related dependencies to the
3241 * in-memory inode block and indexing them separately (via "inodedep"s), we
3242 * can allow "in-core" inode structures to be reused at any time and avoid
3243 * any increase in contention.
3244 *
3245 * Called just before entering the device driver to initiate a new disk I/O.
3246 * The buffer must be locked, thus, no I/O completion operations can occur
3247 * while we are manipulating its associated dependencies.
3248 */
3249/* structure describing disk write to occur */
3250void
3251softdep_disk_io_initiation(struct buf *bp)
3252{
3253 struct worklist *wk, *nextwk;
3254 struct indirdep *indirdep;
3255 struct inodedep *inodedep;
3256 struct buf *sbp;
3257
3258 /*
3259 * We only care about write operations. There should never
3260 * be dependencies for reads.
3261 */
3262 if (bp->b_flags & B_READ0x00008000)
3263 panic("softdep_disk_io_initiation: read");
3264
3265 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3266
3267 /*
3268 * Do any necessary pre-I/O processing.
3269 */
3270 for (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first); wk; wk = nextwk) {
3271 nextwk = LIST_NEXT(wk, wk_list)((wk)->wk_list.le_next);
3272 switch (wk->wk_type) {
3273
3274 case D_PAGEDEP0:
3275 initiate_write_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp);
3276 continue;
3277
3278 case D_INODEDEP1:
3279 inodedep = WK_INODEDEP(wk)((struct inodedep *)(wk));
3280 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954)
3281 initiate_write_inodeblock_ufs1(inodedep, bp);
3282#ifdef FFS21
3283 else
3284 initiate_write_inodeblock_ufs2(inodedep, bp);
3285#endif
3286 continue;
3287
3288 case D_INDIRDEP5:
3289 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3290 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3291 panic("disk_io_initiation: indirdep gone");
3292 /*
3293 * If there are no remaining dependencies, this
3294 * will be writing the real pointers, so the
3295 * dependency can be freed.
3296 */
3297 if (LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first) == NULL((void *)0)) {
3298 sbp = indirdep->ir_savebp;
3299 sbp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
3300 /* inline expand WORKLIST_REMOVE(wk); */
3301 wk->wk_state &= ~ONWORKLIST0x8000;
3302 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
3303 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
3304 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3305 brelse(sbp);
3306 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3307 continue;
3308 }
3309 /*
3310 * Replace up-to-date version with safe version.
3311 */
3312 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3313 indirdep->ir_saveddata = malloc(bp->b_bcount,
3314 M_INDIRDEP83, M_WAITOK0x0001);
3315 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3316 indirdep->ir_stateir_list.wk_state &= ~ATTACHED0x0001;
3317 indirdep->ir_stateir_list.wk_state |= UNDONE0x0002;
3318 memcpy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount)__builtin_memcpy((indirdep->ir_saveddata), (bp->b_data)
, (bp->b_bcount))
;
3319 memcpy(bp->b_data, indirdep->ir_savebp->b_data,__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
3320 bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
;
3321 continue;
3322
3323 case D_MKDIR11:
3324 case D_BMSAFEMAP3:
3325 case D_ALLOCDIRECT4:
3326 case D_ALLOCINDIR6:
3327 continue;
3328
3329 default:
3330 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3331 panic("handle_disk_io_initiation: Unexpected type %s",
3332 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3333 /* NOTREACHED */
3334 }
3335 }
3336
3337 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3338}
3339
3340/*
3341 * Called from within the procedure above to deal with unsatisfied
3342 * allocation dependencies in a directory. The buffer must be locked,
3343 * thus, no I/O completion operations can occur while we are
3344 * manipulating its associated dependencies.
3345 */
3346STATIC void
3347initiate_write_filepage(struct pagedep *pagedep, struct buf *bp)
3348{
3349 struct diradd *dap;
3350 struct direct *ep;
3351 int i;
3352
3353 if (pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) {
3354 /*
3355 * This can only happen if there is a driver that does not
3356 * understand chaining. Here biodone will reissue the call
3357 * to strategy for the incomplete buffers.
3358 */
3359 printf("initiate_write_filepage: already started\n");
3360 return;
3361 }
3362 pagedep->pd_statepd_list.wk_state |= IOSTARTED0x0200;
3363 for (i = 0; i < DAHASHSZ6; i++) {
3364 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[i])->lh_first);
(dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
3365 ep = (struct direct *)
3366 ((char *)bp->b_data + dap->da_offset);
3367 if (ep->d_ino != dap->da_newinum) {
3368 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3369 panic("%s: dir inum %u != new %u",
3370 "initiate_write_filepage",
3371 ep->d_ino, dap->da_newinum);
3372 }
3373 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
3374 ep->d_ino = dap->da_previousda_un.dau_previous->dm_oldinum;
3375 else
3376 ep->d_ino = 0;
3377 dap->da_stateda_list.wk_state &= ~ATTACHED0x0001;
3378 dap->da_stateda_list.wk_state |= UNDONE0x0002;
3379 }
3380 }
3381}
3382
3383/*
3384 * Called from within the procedure above to deal with unsatisfied
3385 * allocation dependencies in an inodeblock. The buffer must be
3386 * locked, thus, no I/O completion operations can occur while we
3387 * are manipulating its associated dependencies.
3388 */
3389/* The inode block */
3390STATIC void
3391initiate_write_inodeblock_ufs1(struct inodedep *inodedep, struct buf *bp)
3392{
3393 struct allocdirect *adp, *lastadp;
3394 struct ufs1_dinode *dp;
3395 struct fs *fs;
3396#ifdef DIAGNOSTIC1
3397 daddr_t prevlbn = 0;
3398 int32_t d1, d2;
3399#endif
3400 int i, deplist;
3401
3402 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) {
3403 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3404 panic("initiate_write_inodeblock: already started");
3405 }
3406 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3407 fs = inodedep->id_fs;
3408 dp = (struct ufs1_dinode *)bp->b_data +
3409 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3410 /*
3411 * If the bitmap is not yet written, then the allocated
3412 * inode cannot be written to disk.
3413 */
3414 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3415 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
3416 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3417 panic("initiate_write_inodeblock: already doing I/O");
3418 }
3419 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3420 inodedep->id_savedino1id_un.idu_savedino1 = malloc(sizeof(struct ufs1_dinode),
3421 M_INODEDEP79, M_WAITOK0x0001);
3422 inodedep->id_unsize = sizeof(struct ufs1_dinode);
3423 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3424 *inodedep->id_savedino1id_un.idu_savedino1 = *dp;
3425 memset(dp, 0, sizeof(struct ufs1_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs1_dinode)));
3426 return;
3427 }
3428 /*
3429 * If no dependencies, then there is nothing to roll back.
3430 */
3431 inodedep->id_savedsize = dp->di_size;
3432 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3433 return;
3434 /*
3435 * Set the dependencies to busy.
3436 */
3437 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3438 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3439#ifdef DIAGNOSTIC1
3440 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3441 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3442 panic("softdep_write_inodeblock: lbn order");
3443 }
3444 prevlbn = adp->ad_lbn;
3445 if (adp->ad_lbn < NDADDR12 &&
3446 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3447 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3448 panic("%s: direct pointer #%lld mismatch %d != %d",
3449 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3450 d1, d2);
3451 }
3452 if (adp->ad_lbn >= NDADDR12 &&
3453 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3454 (d2 = adp->ad_newblkno)) {
3455 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3456 panic("%s: indirect pointer #%lld mismatch %d != %d",
3457 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3458 NDADDR12), d1, d2);
3459 }
3460 deplist |= 1 << adp->ad_lbn;
3461 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3462 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3463 panic("softdep_write_inodeblock: Unknown state 0x%x",
3464 adp->ad_statead_list.wk_state);
3465 }
3466#endif /* DIAGNOSTIC */
3467 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3468 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3469 }
3470 /*
3471 * The on-disk inode cannot claim to be any larger than the last
3472 * fragment that has been written. Otherwise, the on-disk inode
3473 * might have fragments that were not the last block in the file
3474 * which would corrupt the filesystem.
3475 */
3476 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3477 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3478 if (adp->ad_lbn >= NDADDR12)
3479 break;
3480 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3481 /* keep going until hitting a rollback to a frag */
3482 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3483 continue;
3484 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3485 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3486#ifdef DIAGNOSTIC1
3487 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3488 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3489 panic("softdep_write_inodeblock: lost dep1");
3490 }
3491#endif /* DIAGNOSTIC */
3492 dp->di_db[i] = 0;
3493 }
3494 for (i = 0; i < NIADDR3; i++) {
3495#ifdef DIAGNOSTIC1
3496 if (dp->di_ib[i] != 0 &&
3497 (deplist & ((1 << NDADDR12) << i)) == 0) {
3498 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3499 panic("softdep_write_inodeblock: lost dep2");
3500 }
3501#endif /* DIAGNOSTIC */
3502 dp->di_ib[i] = 0;
3503 }
3504 return;
3505 }
3506 /*
3507 * If we have zero'ed out the last allocated block of the file,
3508 * roll back the size to the last currently allocated block.
3509 * We know that this last allocated block is a full-sized as
3510 * we already checked for fragments in the loop above.
3511 */
3512 if (lastadp != NULL((void *)0) &&
3513 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3514 for (i = lastadp->ad_lbn; i >= 0; i--)
3515 if (dp->di_db[i] != 0)
3516 break;
3517 dp->di_size = (i + 1) * fs->fs_bsize;
3518 }
3519 /*
3520 * The only dependencies are for indirect blocks.
3521 *
3522 * The file size for indirect block additions is not guaranteed.
3523 * Such a guarantee would be non-trivial to achieve. The conventional
3524 * synchronous write implementation also does not make this guarantee.
3525 * Fsck should catch and fix discrepancies. Arguably, the file size
3526 * can be over-estimated without destroying integrity when the file
3527 * moves into the indirect blocks (i.e., is large). If we want to
3528 * postpone fsck, we are stuck with this argument.
3529 */
3530 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3531 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3532}
3533
3534#ifdef FFS21
3535/*
3536 * Version of initiate_write_inodeblock that handles FFS2 dinodes.
3537 */
3538/* The inode block */
3539STATIC void
3540initiate_write_inodeblock_ufs2(struct inodedep *inodedep, struct buf *bp)
3541{
3542 struct allocdirect *adp, *lastadp;
3543 struct ufs2_dinode *dp;
3544 struct fs *fs = inodedep->id_fs;
3545#ifdef DIAGNOSTIC1
3546 daddr_t prevlbn = -1, d1, d2;
3547#endif
3548 int deplist, i;
3549
3550 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200)
3551 panic("initiate_write_inodeblock_ufs2: already started");
3552 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3553 fs = inodedep->id_fs;
3554 dp = (struct ufs2_dinode *)bp->b_data +
3555 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3556 /*
3557 * If the bitmap is not yet written, then the allocated
3558 * inode cannot be written to disk.
3559 */
3560 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3561 if (inodedep->id_savedino2id_un.idu_savedino2 != NULL((void *)0))
3562 panic("initiate_write_inodeblock_ufs2: I/O underway");
3563 inodedep->id_savedino2id_un.idu_savedino2 = malloc(sizeof(struct ufs2_dinode),
3564 M_INODEDEP79, M_WAITOK0x0001);
3565 inodedep->id_unsize = sizeof(struct ufs2_dinode);
3566 *inodedep->id_savedino2id_un.idu_savedino2 = *dp;
3567 memset(dp, 0, sizeof(struct ufs2_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs2_dinode)));
3568 return;
3569 }
3570 /*
3571 * If no dependencies, then there is nothing to roll back.
3572 */
3573 inodedep->id_savedsize = dp->di_size;
3574 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3575 return;
3576
3577#ifdef notyet
3578 inodedep->id_savedextsize = dp->di_extsize;
3579 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0) &&
3580 TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first) == NULL((void *)0))
3581 return;
3582 /*
3583 * Set the ext data dependencies to busy.
3584 */
3585 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3586 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3587#ifdef DIAGNOSTIC1
3588 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3589 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3590 panic("softdep_write_inodeblock: lbn order");
3591 }
3592 prevlbn = adp->ad_lbn;
3593 if ((d1 = dp->di_extb[adp->ad_lbn]) !=
3594 (d2 = adp->ad_newblkno)) {
3595 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3596 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3597 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3598 d1, d2);
3599 }
3600 deplist |= 1 << adp->ad_lbn;
3601 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3602 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3603 panic("softdep_write_inodeblock: Unknown state 0x%x",
3604 adp->ad_statead_list.wk_state);
3605 }
3606#endif /* DIAGNOSTIC */
3607 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3608 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3609 }
3610 /*
3611 * The on-disk inode cannot claim to be any larger than the last
3612 * fragment that has been written. Otherwise, the on-disk inode
3613 * might have fragments that were not the last block in the ext
3614 * data which would corrupt the filesystem.
3615 */
3616 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3617 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3618 dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
3619 /* keep going until hitting a rollback to a frag */
3620 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3621 continue;
3622 dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3623 for (i = adp->ad_lbn + 1; i < NXADDR2; i++) {
3624#ifdef DIAGNOSTIC1
3625 if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
3626 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3627 panic("softdep_write_inodeblock: lost dep1");
3628 }
3629#endif /* DIAGNOSTIC */
3630 dp->di_extb[i] = 0;
3631 }
3632 lastadp = NULL((void *)0);
3633 break;
3634 }
3635 /*
3636 * If we have zero'ed out the last allocated block of the ext
3637 * data, roll back the size to the last currently allocated block.
3638 * We know that this last allocated block is a full-sized as
3639 * we already checked for fragments in the loop above.
3640 */
3641 if (lastadp != NULL((void *)0) &&
3642 dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3643 for (i = lastadp->ad_lbn; i >= 0; i--)
3644 if (dp->di_extb[i] != 0)
3645 break;
3646 dp->di_extsize = (i + 1) * fs->fs_bsize;
3647 }
3648#endif /* notyet */
3649
3650 /*
3651 * Set the file data dependencies to busy.
3652 */
3653 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3654 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3655#ifdef DIAGNOSTIC1
3656 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3657 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3658 panic("softdep_write_inodeblock: lbn order");
3659 }
3660 prevlbn = adp->ad_lbn;
3661 if (adp->ad_lbn < NDADDR12 &&
3662 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3663 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3664 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3665 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3666 d1, d2);
3667 }
3668 if (adp->ad_lbn >= NDADDR12 &&
3669 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3670 (d2 = adp->ad_newblkno)) {
3671 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3672 panic("%s: indirect pointer #%lld mismatch %lld != %lld",
3673 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3674 NDADDR12), d1, d2);
3675 }
3676 deplist |= 1 << adp->ad_lbn;
3677 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3678 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3679 panic("softdep_write_inodeblock: Unknown state 0x%x",
3680 adp->ad_statead_list.wk_state);
3681 }
3682#endif /* DIAGNOSTIC */
3683 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3684 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3685 }
3686 /*
3687 * The on-disk inode cannot claim to be any larger than the last
3688 * fragment that has been written. Otherwise, the on-disk inode
3689 * might have fragments that were not the last block in the file
3690 * which would corrupt the filesystem.
3691 */
3692 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3693 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3694 if (adp->ad_lbn >= NDADDR12)
3695 break;
3696 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3697 /* keep going until hitting a rollback to a frag */
3698 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3699 continue;
3700 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3701 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3702#ifdef DIAGNOSTIC1
3703 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3704 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3705 panic("softdep_write_inodeblock: lost dep2");
3706 }
3707#endif /* DIAGNOSTIC */
3708 dp->di_db[i] = 0;
3709 }
3710 for (i = 0; i < NIADDR3; i++) {
3711#ifdef DIAGNOSTIC1
3712 if (dp->di_ib[i] != 0 &&
3713 (deplist & ((1 << NDADDR12) << i)) == 0) {
3714 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3715 panic("softdep_write_inodeblock: lost dep3");
3716 }
3717#endif /* DIAGNOSTIC */
3718 dp->di_ib[i] = 0;
3719 }
3720 return;
3721 }
3722 /*
3723 * If we have zero'ed out the last allocated block of the file,
3724 * roll back the size to the last currently allocated block.
3725 * We know that this last allocated block is a full-sized as
3726 * we already checked for fragments in the loop above.
3727 */
3728 if (lastadp != NULL((void *)0) &&
3729 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3730 for (i = lastadp->ad_lbn; i >= 0; i--)
3731 if (dp->di_db[i] != 0)
3732 break;
3733 dp->di_size = (i + 1) * fs->fs_bsize;
3734 }
3735 /*
3736 * The only dependencies are for indirect blocks.
3737 *
3738 * The file size for indirect block additions is not guaranteed.
3739 * Such a guarantee would be non-trivial to achieve. The conventional
3740 * synchronous write implementation also does not make this guarantee.
3741 * Fsck should catch and fix discrepancies. Arguably, the file size
3742 * can be over-estimated without destroying integrity when the file
3743 * moves into the indirect blocks (i.e., is large). If we want to
3744 * postpone fsck, we are stuck with this argument.
3745 */
3746 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3747 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3748}
3749#endif /* FFS2 */
3750
3751/*
3752 * This routine is called during the completion interrupt
3753 * service routine for a disk write (from the procedure called
3754 * by the device driver to inform the file system caches of
3755 * a request completion). It should be called early in this
3756 * procedure, before the block is made available to other
3757 * processes or other routines are called.
3758 */
3759/* describes the completed disk write */
3760void
3761softdep_disk_write_complete(struct buf *bp)
3762{
3763 struct worklist *wk;
3764 struct workhead reattach;
3765 struct newblk *newblk;
3766 struct allocindir *aip;
3767 struct allocdirect *adp;
3768 struct indirdep *indirdep;
3769 struct inodedep *inodedep;
3770 struct bmsafemap *bmsafemap;
3771
3772 /*
3773 * If an error occurred while doing the write, then the data
3774 * has not hit the disk and the dependencies cannot be unrolled.
3775 */
3776 if ((bp->b_flags & B_ERROR0x00000400) && !(bp->b_flags & B_INVAL0x00000800))
3777 return;
3778
3779#ifdef DEBUG
3780 if (lk.lkt_held != -1)
3781 panic("softdep_disk_write_complete: lock is held");
3782 lk.lkt_held = -2;
3783#endif
3784 LIST_INIT(&reattach)do { ((&reattach)->lh_first) = ((void *)0); } while (0
)
;
3785 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
3786 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3787 switch (wk->wk_type) {
3788
3789 case D_PAGEDEP0:
3790 if (handle_written_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp))
3791 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3792 continue;
3793
3794 case D_INODEDEP1:
3795 if (handle_written_inodeblock(WK_INODEDEP(wk)((struct inodedep *)(wk)), bp))
3796 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3797 continue;
3798
3799 case D_BMSAFEMAP3:
3800 bmsafemap = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk));
3801 while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd)((&bmsafemap->sm_newblkhd)->lh_first))) {
3802 newblk->nb_state |= DEPCOMPLETE0x0008;
3803 newblk->nb_bmsafemap = NULL((void *)0);
3804 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
3805 }
3806 while ((adp =
3807 LIST_FIRST(&bmsafemap->sm_allocdirecthd)((&bmsafemap->sm_allocdirecthd)->lh_first))) {
3808 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
3809 adp->ad_buf = NULL((void *)0);
3810 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
3811 handle_allocdirect_partdone(adp);
3812 }
3813 while ((aip =
3814 LIST_FIRST(&bmsafemap->sm_allocindirhd)((&bmsafemap->sm_allocindirhd)->lh_first))) {
3815 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
3816 aip->ai_buf = NULL((void *)0);
3817 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
3818 handle_allocindir_partdone(aip);
3819 }
3820 while ((inodedep =
3821 LIST_FIRST(&bmsafemap->sm_inodedephd)((&bmsafemap->sm_inodedephd)->lh_first)) != NULL((void *)0)) {
3822 inodedep->id_stateid_list.wk_state |= DEPCOMPLETE0x0008;
3823 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
3824 inodedep->id_buf = NULL((void *)0);
3825 }
3826 WORKITEM_FREE(bmsafemap, D_BMSAFEMAP)softdep_freequeue_add((struct worklist *)bmsafemap);
3827 continue;
3828
3829 case D_MKDIR11:
3830 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_BODY0x0020);
3831 continue;
3832
3833 case D_ALLOCDIRECT4:
3834 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
3835 adp->ad_statead_list.wk_state |= COMPLETE0x0004;
3836 handle_allocdirect_partdone(adp);
3837 continue;
3838
3839 case D_ALLOCINDIR6:
3840 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
3841 aip->ai_stateai_list.wk_state |= COMPLETE0x0004;
3842 handle_allocindir_partdone(aip);
3843 continue;
3844
3845 case D_INDIRDEP5:
3846 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3847 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3848 panic("disk_write_complete: indirdep gone");
3849 memcpy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_saveddata)
, (bp->b_bcount))
;
3850 free(indirdep->ir_saveddata, M_INDIRDEP83, bp->b_bcount);
3851 indirdep->ir_saveddata = NULL((void *)0);
3852 indirdep->ir_stateir_list.wk_state &= ~UNDONE0x0002;
3853 indirdep->ir_stateir_list.wk_state |= ATTACHED0x0001;
3854 while ((aip = LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))) {
3855 handle_allocindir_partdone(aip);
3856 if (aip == LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))
3857 panic("disk_write_complete: not gone");
3858 }
3859 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3860 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
3861 stat_indir_blk_ptrs++;
3862 buf_dirty(bp);
3863 continue;
3864
3865 default:
3866 panic("handle_disk_write_complete: Unknown type %s",
3867 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3868 /* NOTREACHED */
3869 }
3870 }
3871 /*
3872 * Reattach any requests that must be redone.
3873 */
3874 while ((wk = LIST_FIRST(&reattach)((&reattach)->lh_first)) != NULL((void *)0)) {
3875 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3876 WORKLIST_INSERT(&bp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&bp->b_dep)->lh_first) != ((void *)0)) (
&bp->b_dep)->lh_first->wk_list.le_prev = &(wk
)->wk_list.le_next; (&bp->b_dep)->lh_first = (wk
); (wk)->wk_list.le_prev = &(&bp->b_dep)->lh_first
; } while (0); } while (0)
;
3877 }
3878#ifdef DEBUG
3879 if (lk.lkt_held != -2)
3880 panic("softdep_disk_write_complete: lock lost");
3881 lk.lkt_held = -1;
3882#endif
3883}
3884
3885/*
3886 * Called from within softdep_disk_write_complete above. Note that
3887 * this routine is always called from interrupt level with further
3888 * splbio interrupts blocked.
3889 */
3890/* the completed allocdirect */
3891STATIC void
3892handle_allocdirect_partdone(struct allocdirect *adp)
3893{
3894 struct allocdirect *listadp;
3895 struct inodedep *inodedep;
3896 long bsize, delay;
3897
3898 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
3899
3900 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3901 return;
3902 if (adp->ad_buf != NULL((void *)0))
3903 panic("handle_allocdirect_partdone: dangling dep");
3904
3905 /*
3906 * The on-disk inode cannot claim to be any larger than the last
3907 * fragment that has been written. Otherwise, the on-disk inode
3908 * might have fragments that were not the last block in the file
3909 * which would corrupt the filesystem. Thus, we cannot free any
3910 * allocdirects after one whose ad_oldblkno claims a fragment as
3911 * these blocks must be rolled back to zero before writing the inode.
3912 * We check the currently active set of allocdirects in id_inoupdt.
3913 */
3914 inodedep = adp->ad_inodedep;
3915 bsize = inodedep->id_fs->fs_bsize;
3916 TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next)for((listadp) = ((&inodedep->id_inoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
{
3917 /* found our block */
3918 if (listadp == adp)
3919 break;
3920 /* continue if ad_oldlbn is not a fragment */
3921 if (listadp->ad_oldsize == 0 ||
3922 listadp->ad_oldsize == bsize)
3923 continue;
3924 /* hit a fragment */
3925 return;
3926 }
3927 /*
3928 * If we have reached the end of the current list without
3929 * finding the just finished dependency, then it must be
3930 * on the future dependency list. Future dependencies cannot
3931 * be freed until they are moved to the current list.
3932 */
3933 if (listadp == NULL((void *)0)) {
3934#ifdef DEBUG
3935 TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)for((listadp) = ((&inodedep->id_newinoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
3936 /* found our block */
3937 if (listadp == adp)
3938 break;
3939 if (listadp == NULL((void *)0))
3940 panic("handle_allocdirect_partdone: lost dep");
3941#endif /* DEBUG */
3942 return;
3943 }
3944 /*
3945 * If we have found the just finished dependency, then free
3946 * it along with anything that follows it that is complete.
3947 * If the inode still has a bitmap dependency, then it has
3948 * never been written to disk, hence the on-disk inode cannot
3949 * reference the old fragment so we can free it without delay.
3950 */
3951 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
3952 for (; adp; adp = listadp) {
3953 listadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
3954 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3955 return;
3956 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
3957 }
3958}
3959
3960/*
3961 * Called from within softdep_disk_write_complete above. Note that
3962 * this routine is always called from interrupt level with further
3963 * splbio interrupts blocked.
3964 */
3965/* the completed allocindir */
3966STATIC void
3967handle_allocindir_partdone(struct allocindir *aip)
3968{
3969 struct indirdep *indirdep;
3970
3971 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
3972
3973 if ((aip->ai_stateai_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3974 return;
3975 if (aip->ai_buf != NULL((void *)0))
3976 panic("handle_allocindir_partdone: dangling dependency");
3977 indirdep = aip->ai_indirdep;
3978 if (indirdep->ir_stateir_list.wk_state & UNDONE0x0002) {
3979 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3980 LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_donehd
)->lh_first) != ((void *)0)) (&indirdep->ir_donehd)
->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_donehd)->lh_first = (aip); (aip)->
ai_next.le_prev = &(&indirdep->ir_donehd)->lh_first
; } while (0)
;
3981 return;
3982 }
3983 if (indirdep->ir_stateir_list.wk_state & UFS1FMT0x2000)
3984 ((int32_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3985 aip->ai_newblkno;
3986 else
3987 ((int64_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3988 aip->ai_newblkno;
3989 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3990 if (aip->ai_freefrag != NULL((void *)0))
3991 add_to_worklist(&aip->ai_freefrag->ff_list);
3992 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
3993}
3994
3995/*
3996 * Called from within softdep_disk_write_complete above to restore
3997 * in-memory inode block contents to their most up-to-date state. Note
3998 * that this routine is always called from interrupt level with further
3999 * splbio interrupts blocked.
4000 */
4001/* buffer containing the inode block */
4002STATIC int
4003handle_written_inodeblock(struct inodedep *inodedep, struct buf *bp)
4004{
4005 struct worklist *wk, *filefree;
4006 struct allocdirect *adp, *nextadp;
4007 struct ufs1_dinode *dp1 = NULL((void *)0);
4008 struct ufs2_dinode *dp2 = NULL((void *)0);
4009 int hadchanges, fstype;
4010
4011 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4012
4013 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) == 0)
4014 panic("handle_written_inodeblock: not started");
4015 inodedep->id_stateid_list.wk_state &= ~IOSTARTED0x0200;
4016
4017 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954) {
4018 fstype = UM_UFS11;
4019 dp1 = (struct ufs1_dinode *) bp->b_data +
4020 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4021 } else {
4022 fstype = UM_UFS22;
4023 dp2 = (struct ufs2_dinode *) bp->b_data +
4024 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4025 }
4026
4027 /*
4028 * If we had to rollback the inode allocation because of
4029 * bitmaps being incomplete, then simply restore it.
4030 * Keep the block dirty so that it will not be reclaimed until
4031 * all associated dependencies have been cleared and the
4032 * corresponding updates written to disk.
4033 */
4034 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
4035 if (fstype == UM_UFS11)
4036 *dp1 = *inodedep->id_savedino1id_un.idu_savedino1;
4037 else
4038 *dp2 = *inodedep->id_savedino2id_un.idu_savedino2;
4039 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
4040 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
4041 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4042 stat_inode_bitmap++;
4043 buf_dirty(bp);
4044 return (1);
4045 }
4046 inodedep->id_stateid_list.wk_state |= COMPLETE0x0004;
4047 /*
4048 * Roll forward anything that had to be rolled back before
4049 * the inode could be updated.
4050 */
4051 hadchanges = 0;
4052 for (adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp; adp = nextadp) {
4053 nextadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
4054 if (adp->ad_statead_list.wk_state & ATTACHED0x0001)
4055 panic("handle_written_inodeblock: new entry");
4056 if (fstype == UM_UFS11) {
4057 if (adp->ad_lbn < NDADDR12) {
4058 if (dp1->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4059 panic("%s: %s #%lld mismatch %d != "
4060 "%lld",
4061 "handle_written_inodeblock",
4062 "direct pointer",
4063 (long long)adp->ad_lbn,
4064 dp1->di_db[adp->ad_lbn],
4065 (long long)adp->ad_oldblkno);
4066 dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
4067 } else {
4068 if (dp1->di_ib[adp->ad_lbn - NDADDR12] != 0)
4069 panic("%s: %s #%lld allocated as %d",
4070 "handle_written_inodeblock",
4071 "indirect pointer",
4072 (long long)(adp->ad_lbn - NDADDR12),
4073 dp1->di_ib[adp->ad_lbn - NDADDR12]);
4074 dp1->di_ib[adp->ad_lbn - NDADDR12] =
4075 adp->ad_newblkno;
4076 }
4077 } else {
4078 if (adp->ad_lbn < NDADDR12) {
4079 if (dp2->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4080 panic("%s: %s #%lld mismatch %lld != "
4081 "%lld", "handle_written_inodeblock",
4082 "direct pointer",
4083 (long long)adp->ad_lbn,
4084 dp2->di_db[adp->ad_lbn],
4085 (long long)adp->ad_oldblkno);
4086 dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
4087 } else {
4088 if (dp2->di_ib[adp->ad_lbn - NDADDR12] != 0)
4089 panic("%s: %s #%lld allocated as %lld",
4090 "handle_written_inodeblock",
4091 "indirect pointer",
4092 (long long)(adp->ad_lbn - NDADDR12),
4093 dp2->di_ib[adp->ad_lbn - NDADDR12]);
4094 dp2->di_ib[adp->ad_lbn - NDADDR12] =
4095 adp->ad_newblkno;
4096 }
4097 }
4098 adp->ad_statead_list.wk_state &= ~UNDONE0x0002;
4099 adp->ad_statead_list.wk_state |= ATTACHED0x0001;
4100 hadchanges = 1;
4101 }
4102 if (hadchanges && (bp->b_flags & B_DELWRI0x00000080) == 0)
4103 stat_direct_blk_ptrs++;
4104 /*
4105 * Reset the file size to its most up-to-date value.
4106 */
4107 if (inodedep->id_savedsize == -1)
4108 panic("handle_written_inodeblock: bad size");
4109
4110 if (fstype == UM_UFS11) {
4111 if (dp1->di_size != inodedep->id_savedsize) {
4112 dp1->di_size = inodedep->id_savedsize;
4113 hadchanges = 1;
4114 }
4115 } else {
4116 if (dp2->di_size != inodedep->id_savedsize) {
4117 dp2->di_size = inodedep->id_savedsize;
4118 hadchanges = 1;
4119 }
4120 }
4121 inodedep->id_savedsize = -1;
4122 /*
4123 * If there were any rollbacks in the inode block, then it must be
4124 * marked dirty so that its will eventually get written back in
4125 * its correct form.
4126 */
4127 if (hadchanges)
4128 buf_dirty(bp);
4129 /*
4130 * Process any allocdirects that completed during the update.
4131 */
4132 if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
4133 handle_allocdirect_partdone(adp);
4134 /*
4135 * Process deallocations that were held pending until the
4136 * inode had been written to disk. Freeing of the inode
4137 * is delayed until after all blocks have been freed to
4138 * avoid creation of new <vfsid, inum, lbn> triples
4139 * before the old ones have been deleted.
4140 */
4141 filefree = NULL((void *)0);
4142 while ((wk = LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first)) != NULL((void *)0)) {
4143 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4144 switch (wk->wk_type) {
4145
4146 case D_FREEFILE9:
4147 /*
4148 * We defer adding filefree to the worklist until
4149 * all other additions have been made to ensure
4150 * that it will be done after all the old blocks
4151 * have been freed.
4152 */
4153 if (filefree != NULL((void *)0))
4154 panic("handle_written_inodeblock: filefree");
4155 filefree = wk;
4156 continue;
4157
4158 case D_MKDIR11:
4159 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_PARENT0x0010);
4160 continue;
4161
4162 case D_DIRADD10:
4163 diradd_inode_written(WK_DIRADD(wk)((struct diradd *)(wk)), inodedep);
4164 continue;
4165
4166 case D_FREEBLKS8:
4167 wk->wk_state |= COMPLETE0x0004;
4168 if ((wk->wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
4169 continue;
4170 /* FALLTHROUGH */
4171 case D_FREEFRAG7:
4172 case D_DIRREM12:
4173 add_to_worklist(wk);
4174 continue;
4175
4176 case D_NEWDIRBLK13:
4177 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
4178 continue;
4179
4180 default:
4181 panic("handle_written_inodeblock: Unknown type %s",
4182 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4183 /* NOTREACHED */
4184 }
4185 }
4186 if (filefree != NULL((void *)0)) {
4187 if (free_inodedep(inodedep) == 0)
4188 panic("handle_written_inodeblock: live inodedep");
4189 add_to_worklist(filefree);
4190 return (0);
4191 }
4192
4193 /*
4194 * If no outstanding dependencies, free it.
4195 */
4196 if (free_inodedep(inodedep) ||
4197 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
4198 return (0);
4199 return (hadchanges);
4200}
4201
4202/*
4203 * Process a diradd entry after its dependent inode has been written.
4204 * This routine must be called with splbio interrupts blocked.
4205 */
4206STATIC void
4207diradd_inode_written(struct diradd *dap, struct inodedep *inodedep)
4208{
4209 struct pagedep *pagedep;
4210
4211 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4212
4213 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
4214 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4215 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4216 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4217 else
4218 pagedep = dap->da_pagedepda_un.dau_pagedep;
4219 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4220 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4221 }
4222 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
4223}
4224
4225/*
4226 * Handle the completion of a mkdir dependency.
4227 */
4228STATIC void
4229handle_written_mkdir(struct mkdir *mkdir, int type)
4230{
4231 struct diradd *dap;
4232 struct pagedep *pagedep;
4233
4234 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4235
4236 if (mkdir->md_statemd_list.wk_state != type)
4237 panic("handle_written_mkdir: bad type");
4238 dap = mkdir->md_diradd;
4239 dap->da_stateda_list.wk_state &= ~type;
4240 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) == 0)
4241 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
4242 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4243 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4244 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4245 else
4246 pagedep = dap->da_pagedepda_un.dau_pagedep;
4247 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4248 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4249 }
4250 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
4251 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
4252}
4253
4254/*
4255 * Called from within softdep_disk_write_complete above.
4256 * A write operation was just completed. Removed inodes can
4257 * now be freed and associated block pointers may be committed.
4258 * Note that this routine is always called from interrupt level
4259 * with further splbio interrupts blocked.
4260 */
4261/* buffer containing the written page */
4262STATIC int
4263handle_written_filepage(struct pagedep *pagedep, struct buf *bp)
4264{
4265 struct dirrem *dirrem;
4266 struct diradd *dap, *nextdap;
4267 struct direct *ep;
4268 int i, chgs;
4269
4270 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4271
4272 if ((pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) == 0)
4273 panic("handle_written_filepage: not started");
4274 pagedep->pd_statepd_list.wk_state &= ~IOSTARTED0x0200;
4275 /*
4276 * Process any directory removals that have been committed.
4277 */
4278 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first)) != NULL((void *)0)) {
4279 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
4280 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
4281 add_to_worklist(&dirrem->dm_list);
4282 }
4283 /*
4284 * Free any directory additions that have been committed.
4285 * If it is a newly allocated block, we have to wait until
4286 * the on-disk directory inode claims the new block.
4287 */
4288 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0)
4289 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
4290 free_diradd(dap);
4291 /*
4292 * Uncommitted directory entries must be restored.
4293 */
4294 for (chgs = 0, i = 0; i < DAHASHSZ6; i++) {
4295 for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first); dap;
4296 dap = nextdap) {
4297 nextdap = LIST_NEXT(dap, da_pdlist)((dap)->da_pdlist.le_next);
4298 if (dap->da_stateda_list.wk_state & ATTACHED0x0001)
4299 panic("handle_written_filepage: attached");
4300 ep = (struct direct *)
4301 ((char *)bp->b_data + dap->da_offset);
4302 ep->d_ino = dap->da_newinum;
4303 dap->da_stateda_list.wk_state &= ~UNDONE0x0002;
4304 dap->da_stateda_list.wk_state |= ATTACHED0x0001;
4305 chgs = 1;
4306 /*
4307 * If the inode referenced by the directory has
4308 * been written out, then the dependency can be
4309 * moved to the pending list.
4310 */
4311 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4312 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4313 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
4314 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4315 }
4316 }
4317 }
4318 /*
4319 * If there were any rollbacks in the directory, then it must be
4320 * marked dirty so that its will eventually get written back in
4321 * its correct form.
4322 */
4323 if (chgs) {
4324 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4325 stat_dir_entry++;
4326 buf_dirty(bp);
4327 return (1);
4328 }
4329 /*
4330 * If we are not waiting for a new directory block to be
4331 * claimed by its inode, then the pagedep will be freed.
4332 * Otherwise it will remain to track any new entries on
4333 * the page in case they are fsync'ed.
4334 */
4335 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0) {
4336 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
4337 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
4338 }
4339 return (0);
4340}
4341
4342/*
4343 * Writing back in-core inode structures.
4344 *
4345 * The file system only accesses an inode's contents when it occupies an
4346 * "in-core" inode structure. These "in-core" structures are separate from
4347 * the page frames used to cache inode blocks. Only the latter are
4348 * transferred to/from the disk. So, when the updated contents of the
4349 * "in-core" inode structure are copied to the corresponding in-memory inode
4350 * block, the dependencies are also transferred. The following procedure is
4351 * called when copying a dirty "in-core" inode to a cached inode block.
4352 */
4353
4354/*
4355 * Called when an inode is loaded from disk. If the effective link count
4356 * differed from the actual link count when it was last flushed, then we
4357 * need to ensure that the correct effective link count is put back.
4358 */
4359/* the "in_core" copy of the inode */
4360void
4361softdep_load_inodeblock(struct inode *ip)
4362{
4363 struct inodedep *inodedep;
4364
4365 /*
4366 * Check for alternate nlink count.
4367 */
4368 ip->i_effnlink = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
;
4369 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4370 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4371 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4372 return;
4373 }
4374 ip->i_effnlink -= inodedep->id_nlinkdelta;
4375 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4376}
4377
4378/*
4379 * This routine is called just before the "in-core" inode
4380 * information is to be copied to the in-memory inode block.
4381 * Recall that an inode block contains several inodes. If
4382 * the force flag is set, then the dependencies will be
4383 * cleared so that the update can always be made. Note that
4384 * the buffer is locked when this routine is called, so we
4385 * will never be in the middle of writing the inode block
4386 * to disk.
4387 */
4388/* the "in_core" copy of the inode */
4389/* the buffer containing the inode block */
4390/* nonzero => update must be allowed */
4391void
4392softdep_update_inodeblock(struct inode *ip, struct buf *bp, int waitfor)
4393{
4394 struct inodedep *inodedep;
4395 struct worklist *wk;
4396 int error, gotit;
4397
4398 /*
4399 * If the effective link count is not equal to the actual link
4400 * count, then we must track the difference in an inodedep while
4401 * the inode is (potentially) tossed out of the cache. Otherwise,
4402 * if there is no existing inodedep, then there are no dependencies
4403 * to track.
4404 */
4405 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4406 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4407 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4408 if (ip->i_effnlink != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
)
4409 panic("softdep_update_inodeblock: bad link count");
4410 return;
4411 }
4412 if (inodedep->id_nlinkdelta != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink) {
4413 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4414 panic("softdep_update_inodeblock: bad delta");
4415 }
4416 /*
4417 * Changes have been initiated. Anything depending on these
4418 * changes cannot occur until this inode has been written.
4419 */
4420 inodedep->id_stateid_list.wk_state &= ~COMPLETE0x0004;
4421 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) == 0)
4422 WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list)do { (&inodedep->id_list)->wk_state |= 0x8000; do {
if (((&inodedep->id_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&inodedep->
id_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&inodedep->id_list); (&inodedep->id_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
4423 /*
4424 * Any new dependencies associated with the incore inode must
4425 * now be moved to the list associated with the buffer holding
4426 * the in-memory copy of the inode. Once merged process any
4427 * allocdirects that are completed by the merger.
4428 */
4429 merge_inode_lists(inodedep);
4430 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0))
4431 handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first));
4432 /*
4433 * Now that the inode has been pushed into the buffer, the
4434 * operations dependent on the inode being written to disk
4435 * can be moved to the id_bufwait so that they will be
4436 * processed when the buffer I/O completes.
4437 */
4438 while ((wk = LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first)) != NULL((void *)0)) {
4439 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4440 WORKLIST_INSERT(&inodedep->id_bufwait, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&inodedep->id_bufwait)->lh_first) != ((void
*)0)) (&inodedep->id_bufwait)->lh_first->wk_list
.le_prev = &(wk)->wk_list.le_next; (&inodedep->
id_bufwait)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&inodedep->id_bufwait)->lh_first; } while (0); } while
(0)
;
4441 }
4442 /*
4443 * Newly allocated inodes cannot be written until the bitmap
4444 * that allocates them have been written (indicated by
4445 * DEPCOMPLETE being set in id_state). If we are doing a
4446 * forced sync (e.g., an fsync on a file), we force the bitmap
4447 * to be written so that the update can be done.
4448 */
4449 do {
4450 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 || waitfor == 0) {
4451 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4452 return;
4453 }
4454 bp = inodedep->id_buf;
4455 gotit = getdirtybuf(bp, MNT_WAIT1);
4456 } while (gotit == -1);
4457 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4458 if (gotit && (error = bwrite(bp)) != 0)
4459 softdep_error("softdep_update_inodeblock: bwrite", error);
4460 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0)
4461 panic("softdep_update_inodeblock: update failed");
4462}
4463
4464/*
4465 * Merge the new inode dependency list (id_newinoupdt) into the old
4466 * inode dependency list (id_inoupdt). This routine must be called
4467 * with splbio interrupts blocked.
4468 */
4469STATIC void
4470merge_inode_lists(struct inodedep *inodedep)
4471{
4472 struct allocdirect *listadp, *newadp;
4473
4474 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4475
4476 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4477 for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); listadp && newadp;) {
4478 if (listadp->ad_lbn < newadp->ad_lbn) {
4479 listadp = TAILQ_NEXT(listadp, ad_next)((listadp)->ad_next.tqe_next);
4480 continue;
4481 }
4482 TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next)do { if (((newadp)->ad_next.tqe_next) != ((void *)0)) (newadp
)->ad_next.tqe_next->ad_next.tqe_prev = (newadp)->ad_next
.tqe_prev; else (&inodedep->id_newinoupdt)->tqh_last
= (newadp)->ad_next.tqe_prev; *(newadp)->ad_next.tqe_prev
= (newadp)->ad_next.tqe_next; ((newadp)->ad_next.tqe_prev
) = ((void *)-1); ((newadp)->ad_next.tqe_next) = ((void *)
-1); } while (0)
;
4483 TAILQ_INSERT_BEFORE(listadp, newadp, ad_next)do { (newadp)->ad_next.tqe_prev = (listadp)->ad_next.tqe_prev
; (newadp)->ad_next.tqe_next = (listadp); *(listadp)->ad_next
.tqe_prev = (newadp); (listadp)->ad_next.tqe_prev = &(
newadp)->ad_next.tqe_next; } while (0)
;
4484 if (listadp->ad_lbn == newadp->ad_lbn) {
4485 allocdirect_merge(&inodedep->id_inoupdt, newadp,
4486 listadp);
4487 listadp = newadp;
4488 }
4489 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4490 }
4491 TAILQ_CONCAT(&inodedep->id_inoupdt, &inodedep->id_newinoupdt, ad_next)do { if (!(((&inodedep->id_newinoupdt)->tqh_first) ==
((void *)0))) { *(&inodedep->id_inoupdt)->tqh_last
= (&inodedep->id_newinoupdt)->tqh_first; (&inodedep
->id_newinoupdt)->tqh_first->ad_next.tqe_prev = (&
inodedep->id_inoupdt)->tqh_last; (&inodedep->id_inoupdt
)->tqh_last = (&inodedep->id_newinoupdt)->tqh_last
; do { ((&inodedep->id_newinoupdt))->tqh_first = ((
void *)0); ((&inodedep->id_newinoupdt))->tqh_last =
&((&inodedep->id_newinoupdt))->tqh_first; } while
(0); } } while (0)
;
4492}
4493
4494/*
4495 * If we are doing an fsync, then we must ensure that any directory
4496 * entries for the inode have been written after the inode gets to disk.
4497 */
4498/* the "in_core" copy of the inode */
4499int
4500softdep_fsync(struct vnode *vp)
4501{
4502 struct inodedep *inodedep;
4503 struct pagedep *pagedep;
4504 struct worklist *wk;
4505 struct diradd *dap;
4506 struct mount *mnt;
4507 struct vnode *pvp;
4508 struct inode *ip;
4509 struct inode *pip;
4510 struct buf *bp;
4511 struct fs *fs;
4512 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
4513 int error, flushparent;
4514 ufsino_t parentino;
4515 daddr_t lbn;
4516
4517 ip = VTOI(vp)((struct inode *)(vp)->v_data);
4518 fs = ip->i_fsinode_u.fs;
4519 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4520 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
4521 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4522 return (0);
4523 }
4524 if (LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
4525 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
4526 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
4527 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0)) {
4528 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4529 panic("softdep_fsync: pending ops");
4530 }
4531 for (error = 0, flushparent = 0; ; ) {
Although the value stored to 'flushparent' is used in the enclosing expression, the value is never actually read from 'flushparent'
4532 if ((wk = LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first)) == NULL((void *)0))
4533 break;
4534 if (wk->wk_type != D_DIRADD10) {
4535 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4536 panic("softdep_fsync: Unexpected type %s",
4537 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4538 }
4539 dap = WK_DIRADD(wk)((struct diradd *)(wk));
4540 /*
4541 * Flush our parent if this directory entry has a MKDIR_PARENT
4542 * dependency or is contained in a newly allocated block.
4543 */
4544 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4545 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4546 else
4547 pagedep = dap->da_pagedepda_un.dau_pagedep;
4548 mnt = pagedep->pd_mnt;
4549 parentino = pagedep->pd_ino;
4550 lbn = pagedep->pd_lbn;
4551 if ((dap->da_stateda_list.wk_state & (MKDIR_BODY0x0020 | COMPLETE0x0004)) != COMPLETE0x0004) {
4552 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4553 panic("softdep_fsync: dirty");
4554 }
4555 if ((dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) ||
4556 (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800))
4557 flushparent = 1;
4558 else
4559 flushparent = 0;
4560 /*
4561 * If we are being fsync'ed as part of vgone'ing this vnode,
4562 * then we will not be able to release and recover the
4563 * vnode below, so we just have to give up on writing its
4564 * directory entry out. It will eventually be written, just
4565 * not now, but then the user was not asking to have it
4566 * written, so we are not breaking any promises.
4567 */
4568 mtx_enter(&vnode_mtx);
4569 if (vp->v_lflag & VXLOCK0x0100) {
4570 mtx_leave(&vnode_mtx);
4571 break;
4572 }
4573 mtx_leave(&vnode_mtx);
4574 /*
4575 * We prevent deadlock by always fetching inodes from the
4576 * root, moving down the directory tree. Thus, when fetching
4577 * our parent directory, we must unlock ourselves before
4578 * requesting the lock on our parent. See the comment in
4579 * ufs_lookup for details on possible races.
4580 */
4581 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4582 VOP_UNLOCK(vp);
4583 error = VFS_VGET(mnt, parentino, &pvp)(*(mnt)->mnt_op->vfs_vget)(mnt, parentino, &pvp);
4584 vn_lock(vp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
4585 if (error != 0)
4586 return (error);
4587 /*
4588 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
4589 * that are contained in direct blocks will be resolved by
4590 * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
4591 * may require a complete sync'ing of the directory. So, we
4592 * try the cheap and fast UFS_UPDATE first, and if that fails,
4593 * then we do the slower VOP_FSYNC of the directory.
4594 */
4595 pip = VTOI(pvp)((struct inode *)(pvp)->v_data);
4596 if (flushparent) {
4597 error = UFS_UPDATE(pip, 1)((pip)->i_vtbl->iv_update)((pip), (1));
4598 if (error) {
4599 vput(pvp);
4600 return (error);
4601 }
4602 if (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) {
4603 error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT1, p);
4604 if (error) {
4605 vput(pvp);
4606 return (error);
4607 }
4608 }
4609 }
4610 /*
4611 * Flush directory page containing the inode's name.
4612 */
4613 error = bread(pvp, lbn, fs->fs_bsize, &bp);
4614 if (error == 0) {
4615 bp->b_bcount = blksize(fs, pip, lbn)(((lbn) >= 12 || ((((pip))->i_ump->um_fstype == 1) ?
((pip))->dinode_u.ffs1_din->di_size : ((pip))->dinode_u
.ffs2_din->di_size) >= ((lbn) + 1) << (fs)->fs_bshift
) ? (u_int64_t)(fs)->fs_bsize : ((((((((((pip))->i_ump->
um_fstype == 1) ? ((pip))->dinode_u.ffs1_din->di_size :
((pip))->dinode_u.ffs2_din->di_size)) & (fs)->fs_qbmask
)) + (fs)->fs_qfmask) & (fs)->fs_fmask)))
;
4616 error = bwrite(bp);
4617 } else
4618 brelse(bp);
4619 vput(pvp);
4620 if (error != 0)
4621 return (error);
4622 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4623 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
4624 break;
4625 }
4626 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4627 return (0);
4628}
4629
4630/*
4631 * Flush all the dirty bitmaps associated with the block device
4632 * before flushing the rest of the dirty blocks so as to reduce
4633 * the number of dependencies that will have to be rolled back.
4634 */
4635void
4636softdep_fsync_mountdev(struct vnode *vp, int waitfor)
4637{
4638 struct buf *bp, *nbp;
4639 struct worklist *wk;
4640
4641 if (!vn_isdisk(vp, NULL((void *)0)))
4642 panic("softdep_fsync_mountdev: vnode not a disk");
4643 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4644 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp)for ((bp) = ((&vp->v_dirtyblkhd)->lh_first); (bp) &&
((nbp) = ((bp)->b_vnbufs.le_next), 1); (bp) = (nbp))
{
4645 /*
4646 * If it is already scheduled, skip to the next buffer.
4647 */
4648 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4649 if (bp->b_flags & B_BUSY0x00000010)
4650 continue;
4651
4652 if ((bp->b_flags & B_DELWRI0x00000080) == 0) {
4653 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4654 panic("softdep_fsync_mountdev: not dirty");
4655 }
4656 /*
4657 * We are only interested in bitmaps with outstanding
4658 * dependencies.
4659 */
4660 if ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) == NULL((void *)0) ||
4661 wk->wk_type != D_BMSAFEMAP3) {
4662 continue;
4663 }
4664 bremfreebufcache_take(bp);
4665 buf_acquire(bp);
4666 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4667 (void) bawrite(bp);
4668 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4669 /*
4670 * Since we may have slept during the I/O, we need
4671 * to start from a known point.
4672 */
4673 nbp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4674 }
4675 if (waitfor == MNT_WAIT1)
4676 drain_output(vp, 1);
4677 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4678}
4679
4680/*
4681 * This routine is called when we are trying to synchronously flush a
4682 * file. This routine must eliminate any filesystem metadata dependencies
4683 * so that the syncing routine can succeed by pushing the dirty blocks
4684 * associated with the file. If any I/O errors occur, they are returned.
4685 */
4686int
4687softdep_sync_metadata(struct vop_fsync_args *ap)
4688{
4689 struct vnode *vp = ap->a_vp;
4690 struct pagedep *pagedep;
4691 struct allocdirect *adp;
4692 struct allocindir *aip;
4693 struct buf *bp, *nbp;
4694 struct worklist *wk;
4695 int i, gotit, error, waitfor;
4696
4697 /*
4698 * Check whether this vnode is involved in a filesystem
4699 * that is doing soft dependency processing.
4700 */
4701 if (!vn_isdisk(vp, NULL((void *)0))) {
4702 if (!DOINGSOFTDEP(vp)((vp)->v_mount->mnt_flag & 0x04000000))
4703 return (0);
4704 } else
4705 if (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint == NULL((void *)0) ||
4706 (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint->mnt_flag & MNT_SOFTDEP0x04000000) == 0)
4707 return (0);
4708 /*
4709 * Ensure that any direct block dependencies have been cleared.
4710 */
4711 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4712 if ((error = flush_inodedep_deps(VTOI(vp)((struct inode *)(vp)->v_data)->i_fsinode_u.fs, VTOI(vp)((struct inode *)(vp)->v_data)->i_number))) {
4713 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4714 return (error);
4715 }
4716 /*
4717 * For most files, the only metadata dependencies are the
4718 * cylinder group maps that allocate their inode or blocks.
4719 * The block allocation dependencies can be found by traversing
4720 * the dependency lists for any buffers that remain on their
4721 * dirty buffer list. The inode allocation dependency will
4722 * be resolved when the inode is updated with MNT_WAIT.
4723 * This work is done in two passes. The first pass grabs most
4724 * of the buffers and begins asynchronously writing them. The
4725 * only way to wait for these asynchronous writes is to sleep
4726 * on the filesystem vnode which may stay busy for a long time
4727 * if the filesystem is active. So, instead, we make a second
4728 * pass over the dependencies blocking on each write. In the
4729 * usual case we will be blocking against a write that we
4730 * initiated, so when it is done the dependency will have been
4731 * resolved. Thus the second pass is expected to end quickly.
4732 */
4733 waitfor = MNT_NOWAIT2;
4734top:
4735 /*
4736 * We must wait for any I/O in progress to finish so that
4737 * all potential buffers on the dirty list will be visible.
4738 */
4739 drain_output(vp, 1);
4740 bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4741 gotit = getdirtybuf(bp, MNT_WAIT1);
4742 if (gotit == 0) {
4743 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4744 return (0);
4745 } else if (gotit == -1)
4746 goto top;
4747loop:
4748 /*
4749 * As we hold the buffer locked, none of its dependencies
4750 * will disappear.
4751 */
4752 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
4753 switch (wk->wk_type) {
4754
4755 case D_ALLOCDIRECT4:
4756 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
4757 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4758 break;
4759 nbp = adp->ad_buf;
4760 gotit = getdirtybuf(nbp, waitfor);
4761 if (gotit == 0)
4762 break;
4763 else if (gotit == -1)
4764 goto loop;
4765 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4766 if (waitfor == MNT_NOWAIT2) {
4767 bawrite(nbp);
4768 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4769 bawrite(bp);
4770 return (error);
4771 }
4772 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4773 break;
4774
4775 case D_ALLOCINDIR6:
4776 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
4777 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4778 break;
4779 nbp = aip->ai_buf;
4780 gotit = getdirtybuf(nbp, waitfor);
4781 if (gotit == 0)
4782 break;
4783 else if (gotit == -1)
4784 goto loop;
4785 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4786 if (waitfor == MNT_NOWAIT2) {
4787 bawrite(nbp);
4788 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4789 bawrite(bp);
4790 return (error);
4791 }
4792 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4793 break;
4794
4795 case D_INDIRDEP5:
4796 restart:
4797
4798 LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next)for((aip) = ((&((struct indirdep *)(wk))->ir_deplisthd
)->lh_first); (aip)!= ((void *)0); (aip) = ((aip)->ai_next
.le_next))
{
4799 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4800 continue;
4801 nbp = aip->ai_buf;
4802 if (getdirtybuf(nbp, MNT_WAIT1) <= 0)
4803 goto restart;
4804 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4805 if ((error = VOP_BWRITE(nbp)) != 0) {
4806 bawrite(bp);
4807 return (error);
4808 }
4809 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4810 goto restart;
4811 }
4812 break;
4813
4814 case D_INODEDEP1:
4815 if ((error = flush_inodedep_deps(WK_INODEDEP(wk)((struct inodedep *)(wk))->id_fs,
4816 WK_INODEDEP(wk)((struct inodedep *)(wk))->id_ino)) != 0) {
4817 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4818 bawrite(bp);
4819 return (error);
4820 }
4821 break;
4822
4823 case D_PAGEDEP0:
4824 /*
4825 * We are trying to sync a directory that may
4826 * have dependencies on both its own metadata
4827 * and/or dependencies on the inodes of any
4828 * recently allocated files. We walk its diradd
4829 * lists pushing out the associated inode.
4830 */
4831 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
4832 for (i = 0; i < DAHASHSZ6; i++) {
4833 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) ==
4834 NULL((void *)0))
4835 continue;
4836 if ((error =
4837 flush_pagedep_deps(vp, pagedep->pd_mnt,
4838 &pagedep->pd_diraddhd[i]))) {
4839 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4840 bawrite(bp);
4841 return (error);
4842 }
4843 }
4844 break;
4845
4846 case D_MKDIR11:
4847 /*
4848 * This case should never happen if the vnode has
4849 * been properly sync'ed. However, if this function
4850 * is used at a place where the vnode has not yet
4851 * been sync'ed, this dependency can show up. So,
4852 * rather than panic, just flush it.
4853 */
4854 nbp = WK_MKDIR(wk)((struct mkdir *)(wk))->md_buf;
4855 KASSERT(bp != nbp)((bp != nbp) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/ufs/ffs/ffs_softdep.c"
, 4855, "bp != nbp"))
;
4856 gotit = getdirtybuf(nbp, waitfor);
4857 if (gotit == 0)
4858 break;
4859 else if (gotit == -1)
4860 goto loop;
4861 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4862 if (waitfor == MNT_NOWAIT2) {
4863 bawrite(nbp);
4864 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4865 bawrite(bp);
4866 return (error);
4867 }
4868 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4869 break;
4870
4871 case D_BMSAFEMAP3:
4872 /*
4873 * This case should never happen if the vnode has
4874 * been properly sync'ed. However, if this function
4875 * is used at a place where the vnode has not yet
4876 * been sync'ed, this dependency can show up. So,
4877 * rather than panic, just flush it.
4878 */
4879 nbp = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk))->sm_buf;
4880 if (bp == nbp)
4881 break;
4882 gotit = getdirtybuf(nbp, waitfor);
4883 if (gotit == 0)
4884 break;
4885 else if (gotit == -1)
4886 goto loop;
4887 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4888 if (waitfor == MNT_NOWAIT2) {
4889 bawrite(nbp);
4890 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4891 bawrite(bp);
4892 return (error);
4893 }
4894 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4895 break;
4896
4897 default:
4898 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4899 panic("softdep_sync_metadata: Unknown type %s",
4900 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4901 /* NOTREACHED */
4902 }
4903 }
4904 do {
4905 nbp = LIST_NEXT(bp, b_vnbufs)((bp)->b_vnbufs.le_next);
4906 gotit = getdirtybuf(nbp, MNT_WAIT1);
4907 } while (gotit == -1);
4908 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4909 bawrite(bp);
4910 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4911 if (nbp != NULL((void *)0)) {
4912 bp = nbp;
4913 goto loop;
4914 }
4915 /*
4916 * The brief unlock is to allow any pent up dependency
4917 * processing to be done. Then proceed with the second pass.
4918 */
4919 if (waitfor == MNT_NOWAIT2) {
4920 waitfor = MNT_WAIT1;
4921 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4922 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4923 goto top;
4924 }
4925
4926 /*
4927 * If we have managed to get rid of all the dirty buffers,
4928 * then we are done. For certain directories and block
4929 * devices, we may need to do further work.
4930 *
4931 * We must wait for any I/O in progress to finish so that
4932 * all potential buffers on the dirty list will be visible.
4933 */
4934 drain_output(vp, 1);
4935 if (LIST_EMPTY(&vp->v_dirtyblkhd)(((&vp->v_dirtyblkhd)->lh_first) == ((void *)0))) {
4936 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4937 return (0);
4938 }
4939
4940 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4941 /*
4942 * If we are trying to sync a block device, some of its buffers may
4943 * contain metadata that cannot be written until the contents of some
4944 * partially written files have been written to disk. The only easy
4945 * way to accomplish this is to sync the entire filesystem (luckily
4946 * this happens rarely).
4947 */
4948 if (vn_isdisk(vp, NULL((void *)0)) &&
4949 vp->v_specmountpointv_un.vu_specinfo->si_mountpoint && !VOP_ISLOCKED(vp) &&
4950 (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, 0, ap->a_cred,(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
4951 ap->a_p)(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
) != 0)
4952 return (error);
4953 return (0);
4954}
4955
4956/*
4957 * Flush the dependencies associated with an inodedep.
4958 * Called with splbio blocked.
4959 */
4960STATIC int
4961flush_inodedep_deps(struct fs *fs, ufsino_t ino)
4962{
4963 struct inodedep *inodedep;
4964 struct allocdirect *adp;
4965 int gotit, error, waitfor;
4966 struct buf *bp;
4967
4968 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4969
4970 /*
4971 * This work is done in two passes. The first pass grabs most
4972 * of the buffers and begins asynchronously writing them. The
4973 * only way to wait for these asynchronous writes is to sleep
4974 * on the filesystem vnode which may stay busy for a long time
4975 * if the filesystem is active. So, instead, we make a second
4976 * pass over the dependencies blocking on each write. In the
4977 * usual case we will be blocking against a write that we
4978 * initiated, so when it is done the dependency will have been
4979 * resolved. Thus the second pass is expected to end quickly.
4980 * We give a brief window at the top of the loop to allow
4981 * any pending I/O to complete.
4982 */
4983 for (waitfor = MNT_NOWAIT2; ; ) {
4984 retry_ino:
4985 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4986 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4987 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
4988 return (0);
4989 TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next)for((adp) = ((&inodedep->id_inoupdt)->tqh_first); (
adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
4990 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4991 continue;
4992 bp = adp->ad_buf;
4993 gotit = getdirtybuf(bp, waitfor);
4994 if (gotit == 0) {
4995 if (waitfor == MNT_NOWAIT2)
4996 continue;
4997 break;
4998 } else if (gotit == -1)
4999 goto retry_ino;
5000 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5001 if (waitfor == MNT_NOWAIT2) {
5002 bawrite(bp);
5003 } else if ((error = VOP_BWRITE(bp)) != 0) {
5004 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5005 return (error);
5006 }
5007 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5008 break;
5009 }
5010 if (adp != NULL((void *)0))
5011 continue;
5012 retry_newino:
5013 TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next)for((adp) = ((&inodedep->id_newinoupdt)->tqh_first)
; (adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
5014 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
5015 continue;
5016 bp = adp->ad_buf;
5017 gotit = getdirtybuf(bp, waitfor);
5018 if (gotit == 0) {
5019 if (waitfor == MNT_NOWAIT2)
5020 continue;
5021 break;
5022 } else if (gotit == -1)
5023 goto retry_newino;
5024 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5025 if (waitfor == MNT_NOWAIT2) {
5026 bawrite(bp);
5027 } else if ((error = VOP_BWRITE(bp)) != 0) {
5028 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5029 return (error);
5030 }
5031 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5032 break;
5033 }
5034 if (adp != NULL((void *)0))
5035 continue;
5036 /*
5037 * If pass2, we are done, otherwise do pass 2.
5038 */
5039 if (waitfor == MNT_WAIT1)
5040 break;
5041 waitfor = MNT_WAIT1;
5042 }
5043 /*
5044 * Try freeing inodedep in case all dependencies have been removed.
5045 */
5046 if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
5047 (void) free_inodedep(inodedep);
5048 return (0);
5049}
5050
5051/*
5052 * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
5053 * Called with splbio blocked.
5054 */
5055STATIC int
5056flush_pagedep_deps(struct vnode *pvp, struct mount *mp,
5057 struct diraddhd *diraddhdp)
5058{
5059 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
5060 struct worklist *wk;
5061 struct inodedep *inodedep;
5062 struct ufsmount *ump;
5063 struct diradd *dap;
5064 struct vnode *vp;
5065 int gotit, error = 0;
5066 struct buf *bp;
5067 ufsino_t inum;
5068
5069 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
5070
5071 ump = VFSTOUFS(mp)((struct ufsmount *)((mp)->mnt_data));
5072 while ((dap = LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) != NULL((void *)0)) {
5073 /*
5074 * Flush ourselves if this directory entry
5075 * has a MKDIR_PARENT dependency.
5076 */
5077 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5078 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5079 if ((error = UFS_UPDATE(VTOI(pvp), 1)((((struct inode *)(pvp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(pvp)->v_data)), (1))
))
5080 break;
5081 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5082 /*
5083 * If that cleared dependencies, go on to next.
5084 */
5085 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5086 continue;
5087 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5088 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5089 panic("flush_pagedep_deps: MKDIR_PARENT");
5090 }
5091 }
5092 /*
5093 * A newly allocated directory must have its "." and
5094 * ".." entries written out before its name can be
5095 * committed in its parent. We do not want or need
5096 * the full semantics of a synchronous VOP_FSYNC as
5097 * that may end up here again, once for each directory
5098 * level in the filesystem. Instead, we push the blocks
5099 * and wait for them to clear. We have to fsync twice
5100 * because the first call may choose to defer blocks
5101 * that still have dependencies, but deferral will
5102 * happen at most once.
5103 */
5104 inum = dap->da_newinum;
5105 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5106 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5107 if ((error = VFS_VGET(mp, inum, &vp)(*(mp)->mnt_op->vfs_vget)(mp, inum, &vp)) != 0)
5108 break;
5109 if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p)) ||
5110 (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p))) {
5111 vput(vp);
5112 break;
5113 }
5114 drain_output(vp, 0);
5115 /*
5116 * If first block is still dirty with a D_MKDIR
5117 * dependency then it needs to be written now.
5118 */
5119 for (;;) {
5120 error = 0;
5121 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5122 bp = incore(vp, 0);
5123 if (bp == NULL((void *)0)) {
5124 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5125 break;
5126 }
5127 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
5128 if (wk->wk_type == D_MKDIR11)
5129 break;
5130 if (wk) {
5131 gotit = getdirtybuf(bp, MNT_WAIT1);
5132 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5133 if (gotit == -1)
5134 continue;
5135 if (gotit && (error = bwrite(bp)) != 0)
5136 break;
5137 } else
5138 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5139 break;
5140 }
5141 vput(vp);
5142 /* Flushing of first block failed */
5143 if (error)
5144 break;
5145 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5146 /*
5147 * If that cleared dependencies, go on to next.
5148 */
5149 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5150 continue;
5151 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5152 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5153 panic("flush_pagedep_deps: MKDIR_BODY");
5154 }
5155 }
5156 /*
5157 * Flush the inode on which the directory entry depends.
5158 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
5159 * the only remaining dependency is that the updated inode
5160 * count must get pushed to disk. The inode has already
5161 * been pushed into its inode buffer (via VOP_UPDATE) at
5162 * the time of the reference count change. So we need only
5163 * locate that buffer, ensure that there will be no rollback
5164 * caused by a bitmap dependency, then write the inode buffer.
5165 */
5166 if (inodedep_lookup(ump->um_fsufsmount_u.fs, inum, 0, &inodedep) == 0) {
5167 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5168 panic("flush_pagedep_deps: lost inode");
5169 }
5170 /*
5171 * If the inode still has bitmap dependencies,
5172 * push them to disk.
5173 */
5174 retry:
5175 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
5176 bp = inodedep->id_buf;
5177 gotit = getdirtybuf(bp, MNT_WAIT1);
5178 if (gotit == -1)
5179 goto retry;
5180 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5181 if (gotit && (error = bwrite(bp)) != 0)
5182 break;
5183 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5184 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5185 continue;
5186 }
5187 /*
5188 * If the inode is still sitting in a buffer waiting
5189 * to be written, push it to disk.
5190 */
5191 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5192 if ((error = bread(ump->um_devvp,
5193 fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum))((((daddr_t)(((((daddr_t)(ump->ufsmount_u.fs)->fs_fpg *
(((inum) / (ump->ufsmount_u.fs)->fs_ipg))) + (ump->
ufsmount_u.fs)->fs_cgoffset * ((((inum) / (ump->ufsmount_u
.fs)->fs_ipg)) & ~((ump->ufsmount_u.fs)->fs_cgmask
))) + (ump->ufsmount_u.fs)->fs_iblkno) + ((((((inum) % (
ump->ufsmount_u.fs)->fs_ipg) / ((ump->ufsmount_u.fs)
->fs_inopb))) << ((ump->ufsmount_u.fs))->fs_fragshift
))))) << (ump->ufsmount_u.fs)->fs_fsbtodb)
,
5194 (int)ump->um_fsufsmount_u.fs->fs_bsize, &bp)) != 0) {
5195 brelse(bp);
5196 break;
5197 }
5198 if ((error = bwrite(bp)) != 0)
5199 break;
5200 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5201 /*
5202 * If we have failed to get rid of all the dependencies
5203 * then something is seriously wrong.
5204 */
5205 if (dap == LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) {
5206 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5207 panic("flush_pagedep_deps: flush failed");
5208 }
5209 }
5210 if (error)
5211 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5212 return (error);
5213}
5214
5215/*
5216 * A large burst of file addition or deletion activity can drive the
5217 * memory load excessively high. First attempt to slow things down
5218 * using the techniques below. If that fails, this routine requests
5219 * the offending operations to fall back to running synchronously
5220 * until the memory load returns to a reasonable level.
5221 */
5222int
5223softdep_slowdown(struct vnode *vp)
5224{
5225 int max_softdeps_hard;
5226
5227 max_softdeps_hard = max_softdeps * 11 / 10;
5228 if (num_dirrem < max_softdeps_hard / 2 &&
5229 num_inodedep < max_softdeps_hard)
5230 return (0);
5231 stat_sync_limit_hit += 1;
5232 return (1);
5233}
5234
5235/*
5236 * If memory utilization has gotten too high, deliberately slow things
5237 * down and speed up the I/O processing.
5238 */
5239STATIC int
5240request_cleanup(int resource, int islocked)
5241{
5242 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
5243 int s;
5244
5245 /*
5246 * We never hold up the filesystem syncer process.
5247 */
5248 if (p == filesys_syncer || (p->p_flag & P_SOFTDEP0x10000000))
5249 return (0);
5250 /*
5251 * First check to see if the work list has gotten backlogged.
5252 * If it has, co-opt this process to help clean up two entries.
5253 * Because this process may hold inodes locked, we cannot
5254 * handle any remove requests that might block on a locked
5255 * inode as that could lead to deadlock. We set P_SOFTDEP
5256 * to avoid recursively processing the worklist.
5257 */
5258 if (num_on_worklist > max_softdeps / 10) {
5259 atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_SOFTDEP0x10000000);
5260 if (islocked)
5261 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5262 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5263 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5264 atomic_clearbits_intx86_atomic_clearbits_u32(&p->p_flag, P_SOFTDEP0x10000000);
5265 stat_worklist_push += 2;
5266 if (islocked)
5267 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5268 return(1);
5269 }
5270 /*
5271 * Next, we attempt to speed up the syncer process. If that
5272 * is successful, then we allow the process to continue.
5273 */
5274 if (speedup_syncer())
5275 return(0);
5276 /*
5277 * If we are resource constrained on inode dependencies, try
5278 * flushing some dirty inodes. Otherwise, we are constrained
5279 * by file deletions, so try accelerating flushes of directories
5280 * with removal dependencies. We would like to do the cleanup
5281 * here, but we probably hold an inode locked at this point and
5282 * that might deadlock against one that we try to clean. So,
5283 * the best that we can do is request the syncer daemon to do
5284 * the cleanup for us.
5285 */
5286 switch (resource) {
5287
5288 case FLUSH_INODES1:
5289 stat_ino_limit_push += 1;
5290 req_clear_inodedeps += 1;
5291 stat_countp = &stat_ino_limit_hit;
5292 break;
5293
5294 case FLUSH_REMOVE2:
5295 stat_blk_limit_push += 1;
5296 req_clear_remove += 1;
5297 stat_countp = &stat_blk_limit_hit;
5298 break;
5299
5300 default:
5301 if (islocked)
5302 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5303 panic("request_cleanup: unknown type");
5304 }
5305 /*
5306 * Hopefully the syncer daemon will catch up and awaken us.
5307 * We wait at most tickdelay before proceeding in any case.
5308 */
5309 if (islocked == 0)
5310 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5311 proc_waiting += 1;
5312 if (!timeout_pending(&proc_waiting_timeout)((&proc_waiting_timeout)->to_flags & 0x02))
5313 timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
5314
5315 s = FREE_LOCK_INTERLOCKED(&lk)((&lk)->lkt_spl);
5316 tsleep_nsec(&proc_waiting, PPAUSE40, "softupdate", INFSLP0xffffffffffffffffULL);
5317 ACQUIRE_LOCK_INTERLOCKED(&lk, s)(&lk)->lkt_spl = (s);
5318 proc_waiting -= 1;
5319 if (islocked == 0)
5320 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5321 return (1);
5322}
5323
5324/*
5325 * Awaken processes pausing in request_cleanup and clear proc_waiting
5326 * to indicate that there is no longer a timer running.
5327 */
5328void
5329pause_timer(void *arg)
5330{
5331
5332 *stat_countp += 1;
5333 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
5334 if (proc_waiting > 0)
5335 timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
5336}
5337
5338/*
5339 * Flush out a directory with at least one removal dependency in an effort to
5340 * reduce the number of dirrem, freefile, and freeblks dependency structures.
5341 */
5342STATIC void
5343clear_remove(struct proc *p)
5344{
5345 struct pagedep_hashhead *pagedephd;
5346 struct pagedep *pagedep;
5347 static int next = 0;
5348 struct mount *mp;
5349 struct vnode *vp;
5350 int error, cnt;
5351 ufsino_t ino;
5352
5353 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5354 for (cnt = 0; cnt <= pagedep_hash; cnt++) {
5355 pagedephd = &pagedep_hashtbl[next++];
5356 if (next > pagedep_hash)
5357 next = 0;
5358 LIST_FOREACH(pagedep, pagedephd, pd_hash)for((pagedep) = ((pagedephd)->lh_first); (pagedep)!= ((void
*)0); (pagedep) = ((pagedep)->pd_hash.le_next))
{
5359 if (LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first) == NULL((void *)0))
5360 continue;
5361 mp = pagedep->pd_mnt;
5362 ino = pagedep->pd_ino;
5363#if 0
5364 if (vn_start_write(NULL((void *)0), &mp, V_NOWAIT) != 0)
5365 continue;
5366#endif
5367 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5368 if ((error = VFS_VGET(mp, ino, &vp)(*(mp)->mnt_op->vfs_vget)(mp, ino, &vp)) != 0) {
5369 softdep_error("clear_remove: vget", error);
5370#if 0
5371 vn_finished_write(mp);
5372#endif
5373 return;
5374 }
5375 if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p)))
5376 softdep_error("clear_remove: fsync", error);
5377 drain_output(vp, 0);
5378 vput(vp);
5379#if 0
5380 vn_finished_write(mp);
5381#endif
5382 return;
5383 }
5384 }
5385 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5386}
5387
5388/*
5389 * Clear out a block of dirty inodes in an effort to reduce
5390 * the number of inodedep dependency structures.
5391 */
5392STATIC void
5393clear_inodedeps(struct proc *p)
5394{
5395 struct inodedep_hashhead *inodedephd;
5396 struct inodedep *inodedep = NULL((void *)0);
5397 static int next = 0;
5398 struct mount *mp;
5399 struct vnode *vp;
5400 struct fs *fs;
5401 int error, cnt;
5402 ufsino_t firstino, lastino, ino;
5403
5404 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5405 /*
5406 * Pick a random inode dependency to be cleared.
5407 * We will then gather up all the inodes in its block
5408 * that have dependencies and flush them out.
5409 */
5410 for (cnt = 0; cnt <= inodedep_hash; cnt++) {
5411 inodedephd = &inodedep_hashtbl[next++];
5412 if (next > inodedep_hash)
5413 next = 0;
5414 if ((inodedep = LIST_FIRST(inodedephd)((inodedephd)->lh_first)) != NULL((void *)0))
5415 break;
5416 }
5417 if (inodedep == NULL((void *)0)) {
5418 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5419 return;
5420 }
5421 /*
5422 * Ugly code to find mount point given pointer to superblock.
5423 */
5424 fs = inodedep->id_fs;
5425 TAILQ_FOREACH(mp, &mountlist, mnt_list)for((mp) = ((&mountlist)->tqh_first); (mp) != ((void *
)0); (mp) = ((mp)->mnt_list.tqe_next))
5426 if ((mp->mnt_flag & MNT_SOFTDEP0x04000000) && fs == VFSTOUFS(mp)((struct ufsmount *)((mp)->mnt_data))->um_fsufsmount_u.fs)
5427 break;
5428 /*
5429 * Find the last inode in the block with dependencies.
5430 */
5431 firstino = inodedep->id_ino & ~(INOPB(fs)((fs)->fs_inopb) - 1);
5432 for (lastino = firstino + INOPB(fs)((fs)->fs_inopb) - 1; lastino > firstino; lastino--)
5433 if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
5434 break;
5435 /*
5436 * Asynchronously push all but the last inode with dependencies.
5437 * Synchronously push the last inode with dependencies to ensure
5438 * that the inode block gets written to free up the inodedeps.
5439 */
5440 for (ino = firstino; ino <= lastino; ino++) {
5441 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
5442 continue;
5443 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5444#if 0
5445 if (vn_start_write(NULL((void *)0), &mp, V_NOWAIT) != 0)
5446 continue;
5447#endif
5448 if ((error = VFS_VGET(mp, ino, &vp)(*(mp)->mnt_op->vfs_vget)(mp, ino, &vp)) != 0) {
5449 softdep_error("clear_inodedeps: vget", error);
5450#if 0
5451 vn_finished_write(mp);
5452#endif
5453 return;
5454 }
5455 if (ino == lastino) {
5456 if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT1, p)))
5457 softdep_error("clear_inodedeps: fsync1", error);
5458 } else {
5459 if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p)))
5460 softdep_error("clear_inodedeps: fsync2", error);
5461 drain_output(vp, 0);
5462 }
5463 vput(vp);
5464#if 0
5465 vn_finished_write(mp);
5466#endif
5467 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5468 }
5469 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5470}
5471
5472/*
5473 * Function to determine if the buffer has outstanding dependencies
5474 * that will cause a roll-back if the buffer is written. If wantcount
5475 * is set, return number of dependencies, otherwise just yes or no.
5476 */
5477int
5478softdep_count_dependencies(struct buf *bp, int wantcount, int islocked)
5479{
5480 struct worklist *wk;
5481 struct inodedep *inodedep;
5482 struct indirdep *indirdep;
5483 struct allocindir *aip;
5484 struct pagedep *pagedep;
5485 struct diradd *dap;
5486 int i, retval;
5487
5488 retval = 0;
5489 if (!islocked)
5490 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5491 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
5492 switch (wk->wk_type) {
5493
5494 case D_INODEDEP1:
5495 inodedep = WK_INODEDEP(wk)((struct inodedep *)(wk));
5496 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
5497 /* bitmap allocation dependency */
5498 retval += 1;
5499 if (!wantcount)
5500 goto out;
5501 }
5502 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) {
5503 /* direct block pointer dependency */
5504 retval += 1;
5505 if (!wantcount)
5506 goto out;
5507 }
5508 continue;
5509
5510 case D_INDIRDEP5:
5511 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
5512
5513 LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next)for((aip) = ((&indirdep->ir_deplisthd)->lh_first); (
aip)!= ((void *)0); (aip) = ((aip)->ai_next.le_next))
{
5514 /* indirect block pointer dependency */
5515 retval += 1;
5516 if (!wantcount)
5517 goto out;
5518 }
5519 continue;
5520
5521 case D_PAGEDEP0:
5522 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
5523 for (i = 0; i < DAHASHSZ6; i++) {
5524
5525 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[i])->lh_first);
(dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
5526 /* directory entry dependency */
5527 retval += 1;
5528 if (!wantcount)
5529 goto out;
5530 }
5531 }
5532 continue;
5533
5534 case D_BMSAFEMAP3:
5535 case D_ALLOCDIRECT4:
5536 case D_ALLOCINDIR6:
5537 case D_MKDIR11:
5538 /* never a dependency on these blocks */
5539 continue;
5540
5541 default:
5542 if (!islocked)
5543 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5544 panic("softdep_check_for_rollback: Unexpected type %s",
5545 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
5546 /* NOTREACHED */
5547 }
5548 }
5549out:
5550 if (!islocked)
5551 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5552 return retval;
5553}
5554
5555/*
5556 * Acquire exclusive access to a buffer.
5557 * Must be called with splbio blocked.
5558 * Returns:
5559 * 1 if the buffer was acquired and is dirty;
5560 * 0 if the buffer was clean, or we would have slept but had MN_NOWAIT;
5561 * -1 if we slept and may try again (but not with this bp).
5562 */
5563STATIC int
5564getdirtybuf(struct buf *bp, int waitfor)
5565{
5566 int s;
5567
5568 if (bp == NULL((void *)0))
5569 return (0);
5570
5571 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
5572
5573 if (bp->b_flags & B_BUSY0x00000010) {
5574 if (waitfor != MNT_WAIT1)
5575 return (0);
5576 bp->b_flags |= B_WANTED0x00010000;
5577 s = FREE_LOCK_INTERLOCKED(&lk)((&lk)->lkt_spl);
5578 tsleep_nsec(bp, PRIBIO16+1, "sdsdty", INFSLP0xffffffffffffffffULL);
5579 ACQUIRE_LOCK_INTERLOCKED(&lk, s)(&lk)->lkt_spl = (s);
5580 return (-1);
5581 }
5582 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
5583 return (0);
5584 bremfreebufcache_take(bp);
5585 buf_acquire(bp);
5586 return (1);
5587}
5588
5589/*
5590 * Wait for pending output on a vnode to complete.
5591 * Must be called with vnode locked.
5592 */
5593STATIC void
5594drain_output(struct vnode *vp, int islocked)
5595{
5596 int s;
5597
5598 if (!islocked)
5599 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5600
5601 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
5602
5603 while (vp->v_numoutput) {
5604 vp->v_bioflag |= VBIOWAIT0x0001;
5605 s = FREE_LOCK_INTERLOCKED(&lk)((&lk)->lkt_spl);
5606 tsleep_nsec(&vp->v_numoutput, PRIBIO16+1, "drain_output", INFSLP0xffffffffffffffffULL);
5607 ACQUIRE_LOCK_INTERLOCKED(&lk, s)(&lk)->lkt_spl = (s);
5608 }
5609 if (!islocked)
5610 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5611}
5612
5613/*
5614 * Called whenever a buffer that is being invalidated or reallocated
5615 * contains dependencies. This should only happen if an I/O error has
5616 * occurred. The routine is called with the buffer locked.
5617 */
5618void
5619softdep_deallocate_dependencies(struct buf *bp)
5620{
5621
5622 if ((bp->b_flags & B_ERROR0x00000400) == 0)
5623 panic("softdep_deallocate_dependencies: dangling deps");
5624 softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
5625 panic("softdep_deallocate_dependencies: unrecovered I/O error");
5626}
5627
5628/*
5629 * Function to handle asynchronous write errors in the filesystem.
5630 */
5631void
5632softdep_error(char *func, int error)
5633{
5634
5635 /* XXX should do something better! */
5636 printf("%s: got error %d while accessing filesystem\n", func, error);
5637}
5638
5639#ifdef DDB1
5640#include <machine/db_machdep.h>
5641#include <ddb/db_interface.h>
5642#include <ddb/db_output.h>
5643
5644void
5645softdep_print(struct buf *bp, int full,
5646 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
5647{
5648 struct worklist *wk;
5649
5650 (*pr)(" deps:\n");
5651 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
5652 worklist_print(wk, full, pr);
5653}
5654
5655void
5656worklist_print(struct worklist *wk, int full,
5657 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
5658{
5659 struct pagedep *pagedep;
5660 struct inodedep *inodedep;
5661 struct newblk *newblk;
5662 struct bmsafemap *bmsafemap;
5663 struct allocdirect *adp;
5664 struct indirdep *indirdep;
5665 struct allocindir *aip;
5666 struct freefrag *freefrag;
5667 struct freeblks *freeblks;
5668 struct freefile *freefile;
5669 struct diradd *dap;
5670 struct mkdir *mkdir;
5671 struct dirrem *dirrem;
5672 struct newdirblk *newdirblk;
5673 char prefix[33];
5674 int i;
5675
5676 for (prefix[i = 2 * MIN(16, full)(((16)<(full))?(16):(full))] = '\0'; i--; prefix[i] = ' ')
5677 ;
5678
5679 (*pr)("%s%s(%p) state %b\n%s", prefix, TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
, wk,
5680 wk->wk_state, DEP_BITS"\020\01ATTACHED\02UNDONE\03COMPLETE\04DEPCOMPLETE" "\05MKDIR_PARENT\06MKDIR_BODY\07RMDIR\010DIRCHG\011GOINGAWAY"
"\012IOSTARTED\013SPACECOUNTED\014NEWBLOCK\016UFS1FMT\020ONWORKLIST"
, prefix);
5681 switch (wk->wk_type) {
5682 case D_PAGEDEP0:
5683 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
5684 (*pr)("mount %p ino %u lbn %lld\n", pagedep->pd_mnt,
5685 pagedep->pd_ino, (long long)pagedep->pd_lbn);
5686 break;
5687 case D_INODEDEP1:
5688 inodedep = WK_INODEDEP(wk)((struct inodedep *)(wk));
5689 (*pr)("fs %p ino %u nlinkdelta %u dino %p\n"
5690 "%s bp %p savsz %lld\n", inodedep->id_fs,
5691 inodedep->id_ino, inodedep->id_nlinkdelta,
5692 inodedep->id_un.idu_savedino1,
5693 prefix, inodedep->id_buf, inodedep->id_savedsize);
5694 break;
5695 case D_NEWBLK2:
5696 newblk = WK_NEWBLK(wk)((struct newblk *)(wk));
5697 (*pr)("fs %p newblk %lld state %d bmsafemap %p\n",
5698 newblk->nb_fs, (long long)newblk->nb_newblkno,
5699 newblk->nb_state, newblk->nb_bmsafemap);
5700 break;
5701 case D_BMSAFEMAP3:
5702 bmsafemap = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk));
5703 (*pr)("buf %p\n", bmsafemap->sm_buf);
5704 break;
5705 case D_ALLOCDIRECT4:
5706 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
5707 (*pr)("lbn %lld newlbk %lld oldblk %lld newsize %ld olsize "
5708 "%ld\n%s bp %p inodedep %p freefrag %p\n",
5709 (long long)adp->ad_lbn, (long long)adp->ad_newblkno,
5710 (long long)adp->ad_oldblkno, adp->ad_newsize,
5711 adp->ad_oldsize,
5712 prefix, adp->ad_buf, adp->ad_inodedep, adp->ad_freefrag);
5713 break;
5714 case D_INDIRDEP5:
5715 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
5716 (*pr)("savedata %p savebp %p\n", indirdep->ir_saveddata,
5717 indirdep->ir_savebp);
5718 break;
5719 case D_ALLOCINDIR6:
5720 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
5721 (*pr)("off %d newblk %lld oldblk %lld freefrag %p\n"
5722 "%s indirdep %p buf %p\n", aip->ai_offset,
5723 (long long)aip->ai_newblkno, (long long)aip->ai_oldblkno,
5724 aip->ai_freefrag, prefix, aip->ai_indirdep, aip->ai_buf);
5725 break;
5726 case D_FREEFRAG7:
5727 freefrag = WK_FREEFRAG(wk)((struct freefrag *)(wk));
5728 (*pr)("vnode %p mp %p blkno %lld fsize %ld ino %u\n",
5729 freefrag->ff_devvp, freefrag->ff_mnt,
5730 (long long)freefrag->ff_blkno, freefrag->ff_fragsize,
5731 freefrag->ff_inum);
5732 break;
5733 case D_FREEBLKS8:
5734 freeblks = WK_FREEBLKS(wk)((struct freeblks *)(wk));
5735 (*pr)("previno %u devvp %p mp %p oldsz %lld newsz %lld\n"
5736 "%s chkcnt %d uid %d\n", freeblks->fb_previousinum,
5737 freeblks->fb_devvp, freeblks->fb_mnt, freeblks->fb_oldsize,
5738 freeblks->fb_newsize,
5739 prefix, freeblks->fb_chkcnt, freeblks->fb_uid);
5740 break;
5741 case D_FREEFILE9:
5742 freefile = WK_FREEFILE(wk)((struct freefile *)(wk));
5743 (*pr)("mode %x oldino %u vnode %p mp %p\n", freefile->fx_mode,
5744 freefile->fx_oldinum, freefile->fx_devvp, freefile->fx_mnt);
5745 break;
5746 case D_DIRADD10:
5747 dap = WK_DIRADD(wk)((struct diradd *)(wk));
5748 (*pr)("off %d ino %u da_un %p\n", dap->da_offset,
5749 dap->da_newinum, dap->da_un.dau_previous);
5750 break;
5751 case D_MKDIR11:
5752 mkdir = WK_MKDIR(wk)((struct mkdir *)(wk));
5753 (*pr)("diradd %p bp %p\n", mkdir->md_diradd, mkdir->md_buf);
5754 break;
5755 case D_DIRREM12:
5756 dirrem = WK_DIRREM(wk)((struct dirrem *)(wk));
5757 (*pr)("mp %p ino %u dm_un %p\n", dirrem->dm_mnt,
5758 dirrem->dm_oldinum, dirrem->dm_un.dmu_pagedep);
5759 break;
5760 case D_NEWDIRBLK13:
5761 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
5762 (*pr)("pagedep %p\n", newdirblk->db_pagedep);
5763 break;
5764 }
5765}
5766#endif