#7 Backport journal fixes
Merged 3 years ago by daandemeyer. Opened 3 years ago by daandemeyer.
centos-sig-hyperscale/ daandemeyer/systemd journal-backports  into  fb-v250.3

@@ -3,8 +3,8 @@ 

  #include "alloc-util.h"

  #include "journal-remote.h"

  

- static int do_rotate(JournaldFile **f, MMapCache *m, bool compress, bool seal) {

-         int r = journald_file_rotate(f, m, compress, UINT64_MAX, seal, NULL);

+ static int do_rotate(ManagedJournalFile **f, MMapCache *m, bool compress, bool seal) {

+         int r = managed_journal_file_rotate(f, m, compress, UINT64_MAX, seal, NULL);

          if (r < 0) {

                  if (*f)

                          log_error_errno(r, "Failed to rotate %s: %m", (*f)->file->path);
@@ -40,7 +40,7 @@ 

  

          if (w->journal) {

                  log_debug("Closing journal file %s.", w->journal->file->path);

-                 journald_file_close(w->journal);

+                 managed_journal_file_close(w->journal);

          }

  

          if (w->server && w->hashmap_key)

@@ -1,13 +1,13 @@ 

  /* SPDX-License-Identifier: LGPL-2.1-or-later */

  #pragma once

  

- #include "journald-file.h"

  #include "journal-importer.h"

+ #include "managed-journal-file.h"

  

  typedef struct RemoteServer RemoteServer;

  

  typedef struct Writer {

-         JournaldFile *journal;

+         ManagedJournalFile *journal;

          JournalMetrics metrics;

  

          MMapCache *mmap;

@@ -14,11 +14,11 @@ 

  #include "errno-util.h"

  #include "escape.h"

  #include "fd-util.h"

- #include "journald-file.h"

  #include "journal-remote-write.h"

  #include "journal-remote.h"

  #include "journald-native.h"

  #include "macro.h"

+ #include "managed-journal-file.h"

  #include "parse-util.h"

  #include "process-util.h"

  #include "socket-util.h"
@@ -61,7 +61,7 @@ 

                  assert_not_reached();

          }

  

-         r = journald_file_open_reliably(filename,

+         r = managed_journal_file_open_reliably(filename,

                                          O_RDWR|O_CREAT, 0640,

                                          s->compress, UINT64_MAX, s->seal,

                                          &w->metrics,

file modified
+1 -1
@@ -399,7 +399,7 @@ 

  

          z = n;

  

-         map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, iovec, &n, ELEMENTSOF(iovec));

+         map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, iovec, &n, n + N_IOVEC_AUDIT_FIELDS);

  

          server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, LOG_NOTICE, 0);

  

file modified
+34 -35
@@ -29,7 +29,6 @@ 

  #include "id128-util.h"

  #include "io-util.h"

  #include "journal-authenticate.h"

- #include "journald-file.h"

  #include "journal-internal.h"

  #include "journal-vacuum.h"

  #include "journald-audit.h"
@@ -242,7 +241,7 @@ 

          return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;

  }

  

- static void server_add_acls(JournaldFile *f, uid_t uid) {

+ static void server_add_acls(ManagedJournalFile *f, uid_t uid) {

          assert(f);

  

  #if HAVE_ACL
@@ -264,9 +263,9 @@ 

                  int flags,

                  bool seal,

                  JournalMetrics *metrics,

-                 JournaldFile **ret) {

+                 ManagedJournalFile **ret) {

  

-         _cleanup_(journald_file_closep) JournaldFile *f = NULL;

+         _cleanup_(managed_journal_file_closep) ManagedJournalFile *f = NULL;

          int r;

  

          assert(s);
@@ -274,11 +273,11 @@ 

          assert(ret);

  

          if (reliably)

-                 r = journald_file_open_reliably(fname, flags, 0640, s->compress.enabled,

+                 r = managed_journal_file_open_reliably(fname, flags, 0640, s->compress.enabled,

                                                  s->compress.threshold_bytes, seal, metrics, s->mmap,

                                                  s->deferred_closes, NULL, &f);

          else

-                 r = journald_file_open(-1, fname, flags, 0640, s->compress.enabled,

+                 r = managed_journal_file_open(-1, fname, flags, 0640, s->compress.enabled,

                                         s->compress.threshold_bytes, seal, metrics, s->mmap,

                                         s->deferred_closes, NULL, &f);

  
@@ -389,9 +388,9 @@ 

          return r;

  }

  

- static JournaldFile* find_journal(Server *s, uid_t uid) {

+ static ManagedJournalFile* find_journal(Server *s, uid_t uid) {

          _cleanup_free_ char *p = NULL;

-         JournaldFile *f;

+         ManagedJournalFile *f;

          int r;

  

          assert(s);
@@ -434,7 +433,7 @@ 

          /* Too many open? Then let's close one (or more) */

          while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {

                  assert_se(f = ordered_hashmap_steal_first(s->user_journals));

-                 (void) journald_file_close(f);

+                 (void) managed_journal_file_close(f);

          }

  

          r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
@@ -443,7 +442,7 @@ 

  

          r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);

          if (r < 0) {

-                 (void) journald_file_close(f);

+                 (void) managed_journal_file_close(f);

                  return s->system_journal;

          }

  
@@ -453,7 +452,7 @@ 

  

  static int do_rotate(

                  Server *s,

-                 JournaldFile **f,

+                 ManagedJournalFile **f,

                  const char* name,

                  bool seal,

                  uint32_t uid) {
@@ -464,7 +463,7 @@ 

          if (!*f)

                  return -EINVAL;

  

-         r = journald_file_rotate(f, s->mmap, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);

+         r = managed_journal_file_rotate(f, s->mmap, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);

          if (r < 0) {

                  if (*f)

                          return log_error_errno(r, "Failed to rotate %s: %m", (*f)->file->path);
@@ -477,15 +476,15 @@ 

  }

  

  static void server_process_deferred_closes(Server *s) {

-         JournaldFile *f;

+         ManagedJournalFile *f;

  

          /* Perform any deferred closes which aren't still offlining. */

          SET_FOREACH(f, s->deferred_closes) {

-                 if (journald_file_is_offlining(f))

+                 if (managed_journal_file_is_offlining(f))

                          continue;

  

                  (void) set_remove(s->deferred_closes, f);

-                 (void) journald_file_close(f);

+                 (void) managed_journal_file_close(f);

          }

  }

  
@@ -501,10 +500,10 @@ 

  

          /* And now, let's close some more until we reach the limit again. */

          while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {

-                 JournaldFile *f;

+                 ManagedJournalFile *f;

  

                  assert_se(f = set_steal_first(s->deferred_closes));

-                 journald_file_close(f);

+                 managed_journal_file_close(f);

          }

  }

  
@@ -527,7 +526,7 @@ 

                  _cleanup_close_ int fd = -1;

                  const char *a, *b;

                  struct dirent *de;

-                 JournaldFile *f;

+                 ManagedJournalFile *f;

                  uid_t uid;

  

                  errno = 0;
@@ -575,7 +574,7 @@ 

                  server_vacuum_deferred_closes(s);

  

                  /* Open the file briefly, so that we can archive it */

-                 r = journald_file_open(fd,

+                 r = managed_journal_file_open(fd,

                                         full,

                                         O_RDWR,

                                         0640,
@@ -599,13 +598,13 @@ 

                          continue;

                  }

  

-                 TAKE_FD(fd); /* Donated to journald_file_open() */

+                 TAKE_FD(fd); /* Donated to managed_journal_file_open() */

  

                  r = journal_file_archive(f->file, NULL);

                  if (r < 0)

                          log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);

  

-                 journald_file_initiate_close(f, s->deferred_closes);

+                 managed_journal_file_initiate_close(f, s->deferred_closes);

                  f = NULL;

          }

  
@@ -613,7 +612,7 @@ 

  }

  

  void server_rotate(Server *s) {

-         JournaldFile *f;

+         ManagedJournalFile *f;

          void *k;

          int r;

  
@@ -642,17 +641,17 @@ 

  }

  

  void server_sync(Server *s) {

-         JournaldFile *f;

+         ManagedJournalFile *f;

          int r;

  

          if (s->system_journal) {

-                 r = journald_file_set_offline(s->system_journal, false);

+                 r = managed_journal_file_set_offline(s->system_journal, false);

                  if (r < 0)

                          log_warning_errno(r, "Failed to sync system journal, ignoring: %m");

          }

  

          ORDERED_HASHMAP_FOREACH(f, s->user_journals) {

-                 r = journald_file_set_offline(f, false);

+                 r = managed_journal_file_set_offline(f, false);

                  if (r < 0)

                          log_warning_errno(r, "Failed to sync user journal, ignoring: %m");

          }
@@ -797,7 +796,7 @@ 

  static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {

          bool vacuumed = false, rotate = false;

          struct dual_timestamp ts;

-         JournaldFile *f;

+         ManagedJournalFile *f;

          int r;

  

          assert(s);
@@ -1207,7 +1206,7 @@ 

          if (s->system_journal)

                  journal_file_post_change(s->system_journal->file);

  

-         s->runtime_journal = journald_file_close(s->runtime_journal);

+         s->runtime_journal = managed_journal_file_close(s->runtime_journal);

  

          if (r >= 0)

                  (void) rm_rf(s->runtime_storage.path, REMOVE_ROOT);
@@ -1247,9 +1246,9 @@ 

  

          (void) system_journal_open(s, false, true);

  

-         s->system_journal = journald_file_close(s->system_journal);

-         ordered_hashmap_clear_with_destructor(s->user_journals, journald_file_close);

-         set_clear_with_destructor(s->deferred_closes, journald_file_close);

+         s->system_journal = managed_journal_file_close(s->system_journal);

+         ordered_hashmap_clear_with_destructor(s->user_journals, managed_journal_file_close);

+         set_clear_with_destructor(s->deferred_closes, managed_journal_file_close);

  

          fn = strjoina(s->runtime_directory, "/flushed");

          if (unlink(fn) < 0 && errno != ENOENT)
@@ -2444,7 +2443,7 @@ 

  

  void server_maybe_append_tags(Server *s) {

  #if HAVE_GCRYPT

-         JournaldFile *f;

+         ManagedJournalFile *f;

          usec_t n;

  

          n = now(CLOCK_REALTIME);
@@ -2463,17 +2462,17 @@ 

          free(s->namespace);

          free(s->namespace_field);

  

-         set_free_with_destructor(s->deferred_closes, journald_file_close);

+         set_free_with_destructor(s->deferred_closes, managed_journal_file_close);

  

          while (s->stdout_streams)

                  stdout_stream_free(s->stdout_streams);

  

          client_context_flush_all(s);

  

-         (void) journald_file_close(s->system_journal);

-         (void) journald_file_close(s->runtime_journal);

+         (void) managed_journal_file_close(s->system_journal);

+         (void) managed_journal_file_close(s->runtime_journal);

  

-         ordered_hashmap_free_with_destructor(s->user_journals, journald_file_close);

+         ordered_hashmap_free_with_destructor(s->user_journals, managed_journal_file_close);

  

          varlink_server_unref(s->varlink_server);

  

@@ -10,11 +10,11 @@ 

  

  #include "conf-parser.h"

  #include "hashmap.h"

- #include "journald-file.h"

  #include "journald-context.h"

  #include "journald-rate-limit.h"

  #include "journald-stream.h"

  #include "list.h"

+ #include "managed-journal-file.h"

  #include "prioq.h"

  #include "ratelimit.h"

  #include "time-util.h"
@@ -89,8 +89,8 @@ 

          sd_event_source *watchdog_event_source;

          sd_event_source *idle_event_source;

  

-         JournaldFile *runtime_journal;

-         JournaldFile *system_journal;

+         ManagedJournalFile *runtime_journal;

+         ManagedJournalFile *system_journal;

          OrderedHashmap *user_journals;

  

          uint64_t seqnum;

src/journal/managed-journal-file.c src/journal/journald-file.c
file renamed
+81 -50
@@ -5,10 +5,11 @@ 

  

  #include "chattr-util.h"

  #include "copy.h"

+ #include "errno-util.h"

  #include "fd-util.h"

  #include "format-util.h"

  #include "journal-authenticate.h"

- #include "journald-file.h"

+ #include "managed-journal-file.h"

  #include "path-util.h"

  #include "random-util.h"

  #include "set.h"
@@ -18,12 +19,12 @@ 

  #define PAYLOAD_BUFFER_SIZE (16U * 1024U)

  #define MINIMUM_HOLE_SIZE (1U * 1024U * 1024U / 2U)

  

- static int journald_file_truncate(JournalFile *f) {

+ static int managed_journal_file_truncate(JournalFile *f) {

          uint64_t p;

          int r;

  

          /* truncate excess from the end of archives */

-         r = journal_file_tail_end(f, &p);

+         r = journal_file_tail_end_by_pread(f, &p);

          if (r < 0)

                  return log_debug_errno(r, "Failed to determine end of tail object: %m");

  
@@ -31,12 +32,12 @@ 

          f->header->arena_size = htole64(p - le64toh(f->header->header_size));

  

          if (ftruncate(f->fd, p) < 0)

-                 log_debug_errno(errno, "Failed to truncate %s: %m", f->path);

+                 return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);

  

-         return 0;

+         return journal_file_fstat(f);

  }

  

- static int journald_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) {

+ static int managed_journal_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) {

          Object o;

          uint64_t offset, sz, n_items = 0, n_unused;

          int r;
@@ -45,7 +46,7 @@ 

                  return 0;

  

          for (uint64_t q = p; q != 0; q = le64toh(o.entry_array.next_entry_array_offset)) {

-                 r = journal_file_read_object(f, OBJECT_ENTRY_ARRAY, q, &o);

+                 r = journal_file_read_object_header(f, OBJECT_ENTRY_ARRAY, q, &o);

                  if (r < 0)

                          return r;

  
@@ -72,19 +73,44 @@ 

          if (sz < MINIMUM_HOLE_SIZE)

                  return 0;

  

-         if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0)

+         if (p == le64toh(f->header->tail_object_offset) && !f->seal) {

+                 ssize_t n;

+ 

+                 o.object.size = htole64(offset - p);

+ 

+                 n = pwrite(f->fd, &o, sizeof(EntryArrayObject), p);

+                 if (n < 0)

+                         return log_debug_errno(errno, "Failed to modify entry array object size: %m");

+                 if ((size_t) n != sizeof(EntryArrayObject))

+                         return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short pwrite() while modifying entry array object size.");

+ 

+                 f->header->arena_size = htole64(ALIGN64(offset) - le64toh(f->header->header_size));

+ 

+                 if (ftruncate(f->fd, ALIGN64(offset)) < 0)

+                         return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);

+ 

+                 return 0;

+         }

+ 

+         if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0) {

+                 if (ERRNO_IS_NOT_SUPPORTED(errno)) {

+                         log_debug("Hole punching not supported by backing file system, skipping.");

+                         return -EOPNOTSUPP; /* Make recognizable */

+                 }

+ 

                  return log_debug_errno(errno, "Failed to punch hole in entry array of %s: %m", f->path);

+         }

  

          return 0;

  }

  

- static int journald_file_punch_holes(JournalFile *f) {

+ static int managed_journal_file_punch_holes(JournalFile *f) {

          HashItem items[PAYLOAD_BUFFER_SIZE / sizeof(HashItem)];

          uint64_t p, sz;

          ssize_t n = SSIZE_MAX;

          int r;

  

-         r = journald_file_entry_array_punch_hole(

+         r = managed_journal_file_entry_array_punch_hole(

                  f, le64toh(f->header->entry_array_offset), le64toh(f->header->n_entries));

          if (r < 0)

                  return r;
@@ -93,9 +119,10 @@ 

          sz = le64toh(f->header->data_hash_table_size);

  

          for (uint64_t i = p; i < p + sz && n > 0; i += n) {

-                 n = pread(f->fd, items, MIN(sizeof(items), p + sz - i), i);

+                 size_t m = MIN(sizeof(items), p + sz - i);

+                 n = pread(f->fd, items, m, i);

                  if (n < 0)

-                         return n;

+                         return log_debug_errno(errno, "Failed to read hash table items: %m");

  

                  /* Let's ignore any partial hash items by rounding down to the nearest multiple of HashItem. */

                  n -= n % sizeof(HashItem);
@@ -106,7 +133,7 @@ 

                          for (uint64_t q = le64toh(items[j].head_hash_offset); q != 0;

                               q = le64toh(o.data.next_hash_offset)) {

  

-                                 r = journal_file_read_object(f, OBJECT_DATA, q, &o);

+                                 r = journal_file_read_object_header(f, OBJECT_DATA, q, &o);

                                  if (r < 0) {

                                          log_debug_errno(r, "Invalid data object: %m, ignoring");

                                          break;
@@ -115,8 +142,12 @@ 

                                  if (le64toh(o.data.n_entries) == 0)

                                          continue;

  

-                                 (void) journald_file_entry_array_punch_hole(

-                                         f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1);

+                                 r = managed_journal_file_entry_array_punch_hole(

+                                                 f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1);

+                                 if (r == -EOPNOTSUPP)

+                                         return -EOPNOTSUPP;

+ 

+                                 /* Ignore other errors */

                          }

                  }

          }
@@ -127,7 +158,7 @@ 

  /* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().

   * As a result we use atomic operations on f->offline_state for inter-thread communications with

   * journal_file_set_offline() and journal_file_set_online(). */

- static void journald_file_set_offline_internal(JournaldFile *f) {

+ static void managed_journal_file_set_offline_internal(ManagedJournalFile *f) {

          int r;

  

          assert(f);
@@ -153,8 +184,8 @@ 

  

                  case OFFLINE_SYNCING:

                          if (f->file->archive) {

-                                 (void) journald_file_truncate(f->file);

-                                 (void) journald_file_punch_holes(f->file);

+                                 (void) managed_journal_file_truncate(f->file);

+                                 (void) managed_journal_file_punch_holes(f->file);

                          }

  

                          (void) fsync(f->file->fd);
@@ -179,7 +210,7 @@ 

  

                                  log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path);

  

-                                 r = copy_file_atomic(f->file->path, f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC);

+                                 r = copy_file_atomic(FORMAT_PROC_FD_PATH(f->file->fd), f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC | COPY_HOLES);

                                  if (r < 0) {

                                          log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path);

                                          continue;
@@ -202,18 +233,18 @@ 

          }

  }

  

- static void * journald_file_set_offline_thread(void *arg) {

-         JournaldFile *f = arg;

+ static void * managed_journal_file_set_offline_thread(void *arg) {

+         ManagedJournalFile *f = arg;

  

          (void) pthread_setname_np(pthread_self(), "journal-offline");

  

-         journald_file_set_offline_internal(f);

+         managed_journal_file_set_offline_internal(f);

  

          return NULL;

  }

  

  /* Trigger a restart if the offline thread is mid-flight in a restartable state. */

- static bool journald_file_set_offline_try_restart(JournaldFile *f) {

+ static bool managed_journal_file_set_offline_try_restart(ManagedJournalFile *f) {

          for (;;) {

                  switch (f->file->offline_state) {

                  case OFFLINE_AGAIN_FROM_SYNCING:
@@ -251,7 +282,7 @@ 

   * and joined, or if none exists the offline is simply performed in this

   * context without involving another thread.

   */

- int journald_file_set_offline(JournaldFile *f, bool wait) {

+ int managed_journal_file_set_offline(ManagedJournalFile *f, bool wait) {

          int target_state;

          bool restarted;

          int r;
@@ -270,11 +301,11 @@ 

           * we must also join any potentially lingering offline thread when already in

           * the desired offline state.

           */

-         if (!journald_file_is_offlining(f) && f->file->header->state == target_state)

+         if (!managed_journal_file_is_offlining(f) && f->file->header->state == target_state)

                  return journal_file_set_offline_thread_join(f->file);

  

          /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */

-         restarted = journald_file_set_offline_try_restart(f);

+         restarted = managed_journal_file_set_offline_try_restart(f);

          if ((restarted && wait) || !restarted) {

                  r = journal_file_set_offline_thread_join(f->file);

                  if (r < 0)
@@ -288,7 +319,7 @@ 

          f->file->offline_state = OFFLINE_SYNCING;

  

          if (wait) /* Without using a thread if waiting. */

-                 journald_file_set_offline_internal(f);

+                 managed_journal_file_set_offline_internal(f);

          else {

                  sigset_t ss, saved_ss;

                  int k;
@@ -302,7 +333,7 @@ 

                  if (r > 0)

                          return -r;

  

-                 r = pthread_create(&f->file->offline_thread, NULL, journald_file_set_offline_thread, f);

+                 r = pthread_create(&f->file->offline_thread, NULL, managed_journal_file_set_offline_thread, f);

  

                  k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);

                  if (r > 0) {
@@ -316,7 +347,7 @@ 

          return 0;

  }

  

- bool journald_file_is_offlining(JournaldFile *f) {

+ bool managed_journal_file_is_offlining(ManagedJournalFile *f) {

          assert(f);

  

          __sync_synchronize();
@@ -327,7 +358,7 @@ 

          return true;

  }

  

- JournaldFile* journald_file_close(JournaldFile *f) {

+ ManagedJournalFile* managed_journal_file_close(ManagedJournalFile *f) {

          if (!f)

                  return NULL;

  
@@ -349,14 +380,14 @@ 

                  sd_event_source_disable_unref(f->file->post_change_timer);

          }

  

-         journald_file_set_offline(f, true);

+         managed_journal_file_set_offline(f, true);

  

          journal_file_close(f->file);

  

          return mfree(f);

  }

  

- int journald_file_open(

+ int managed_journal_file_open(

                  int fd,

                  const char *fname,

                  int flags,
@@ -367,14 +398,14 @@ 

                  JournalMetrics *metrics,

                  MMapCache *mmap_cache,

                  Set *deferred_closes,

-                 JournaldFile *template,

-                 JournaldFile **ret) {

-         _cleanup_free_ JournaldFile *f = NULL;

+                 ManagedJournalFile *template,

+                 ManagedJournalFile **ret) {

+         _cleanup_free_ ManagedJournalFile *f = NULL;

          int r;

  

-         set_clear_with_destructor(deferred_closes, journald_file_close);

+         set_clear_with_destructor(deferred_closes, managed_journal_file_close);

  

-         f = new0(JournaldFile, 1);

+         f = new0(ManagedJournalFile, 1);

          if (!f)

                  return -ENOMEM;

  
@@ -389,7 +420,7 @@ 

  }

  

  

- JournaldFile* journald_file_initiate_close(JournaldFile *f, Set *deferred_closes) {

+ ManagedJournalFile* managed_journal_file_initiate_close(ManagedJournalFile *f, Set *deferred_closes) {

          int r;

  

          assert(f);
@@ -399,16 +430,16 @@ 

                  if (r < 0)

                          log_debug_errno(r, "Failed to add file to deferred close set, closing immediately.");

                  else {

-                         (void) journald_file_set_offline(f, false);

+                         (void) managed_journal_file_set_offline(f, false);

                          return NULL;

                  }

          }

  

-         return journald_file_close(f);

+         return managed_journal_file_close(f);

  }

  

- int journald_file_rotate(

-                 JournaldFile **f,

+ int managed_journal_file_rotate(

+                 ManagedJournalFile **f,

                  MMapCache *mmap_cache,

                  bool compress,

                  uint64_t compress_threshold_bytes,
@@ -416,7 +447,7 @@ 

                  Set *deferred_closes) {

  

          _cleanup_free_ char *path = NULL;

-         JournaldFile *new_file = NULL;

+         ManagedJournalFile *new_file = NULL;

          int r;

  

          assert(f);
@@ -426,7 +457,7 @@ 

          if (r < 0)

                  return r;

  

-         r = journald_file_open(

+         r = managed_journal_file_open(

                          -1,

                          path,

                          (*f)->file->flags,
@@ -440,13 +471,13 @@ 

                          *f,              /* template */

                          &new_file);

  

-         journald_file_initiate_close(*f, deferred_closes);

+         managed_journal_file_initiate_close(*f, deferred_closes);

          *f = new_file;

  

          return r;

  }

  

- int journald_file_open_reliably(

+ int managed_journal_file_open_reliably(

                  const char *fname,

                  int flags,

                  mode_t mode,
@@ -456,12 +487,12 @@ 

                  JournalMetrics *metrics,

                  MMapCache *mmap_cache,

                  Set *deferred_closes,

-                 JournaldFile *template,

-                 JournaldFile **ret) {

+                 ManagedJournalFile *template,

+                 ManagedJournalFile **ret) {

  

          int r;

  

-         r = journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,

+         r = managed_journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,

                                 mmap_cache, deferred_closes, template, ret);

          if (!IN_SET(r,

                      -EBADMSG,           /* Corrupted */
@@ -491,6 +522,6 @@ 

          if (r < 0)

                  return r;

  

-         return journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,

+         return managed_journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,

                                    mmap_cache, deferred_closes, template, ret);

  }

src/journal/managed-journal-file.h src/journal/journald-file.h
file renamed
+13 -13
@@ -5,9 +5,9 @@ 

  

  typedef struct {

          JournalFile *file;

- } JournaldFile;

+ } ManagedJournalFile;

  

- int journald_file_open(

+ int managed_journal_file_open(

                  int fd,

                  const char *fname,

                  int flags,
@@ -18,15 +18,15 @@ 

                  JournalMetrics *metrics,

                  MMapCache *mmap_cache,

                  Set *deferred_closes,

-                 JournaldFile *template,

-                 JournaldFile **ret);

+                 ManagedJournalFile *template,

+                 ManagedJournalFile **ret);

  

- int journald_file_set_offline(JournaldFile *f, bool wait);

- bool journald_file_is_offlining(JournaldFile *f);

- JournaldFile* journald_file_close(JournaldFile *f);

- DEFINE_TRIVIAL_CLEANUP_FUNC(JournaldFile*, journald_file_close);

+ int managed_journal_file_set_offline(ManagedJournalFile *f, bool wait);

+ bool managed_journal_file_is_offlining(ManagedJournalFile *f);

+ ManagedJournalFile* managed_journal_file_close(ManagedJournalFile *f);

+ DEFINE_TRIVIAL_CLEANUP_FUNC(ManagedJournalFile*, managed_journal_file_close);

  

- int journald_file_open_reliably(

+ int managed_journal_file_open_reliably(

                  const char *fname,

                  int flags,

                  mode_t mode,
@@ -36,8 +36,8 @@ 

                  JournalMetrics *metrics,

                  MMapCache *mmap_cache,

                  Set *deferred_closes,

-                 JournaldFile *template,

-                 JournaldFile **ret);

+                 ManagedJournalFile *template,

+                 ManagedJournalFile **ret);

  

- JournaldFile* journald_file_initiate_close(JournaldFile *f, Set *deferred_closes);

- int journald_file_rotate(JournaldFile **f, MMapCache *mmap_cache, bool compress, uint64_t compress_threshold_bytes, bool seal, Set *deferred_closes);

+ ManagedJournalFile* managed_journal_file_initiate_close(ManagedJournalFile *f, Set *deferred_closes);

+ int managed_journal_file_rotate(ManagedJournalFile **f, MMapCache *mmap_cache, bool compress, uint64_t compress_threshold_bytes, bool seal, Set *deferred_closes);

file modified
+2 -2
@@ -7,8 +7,6 @@ 

          journald-console.h

          journald-context.c

          journald-context.h

-         journald-file.c

-         journald-file.h

          journald-kmsg.c

          journald-kmsg.h

          journald-native.c
@@ -23,6 +21,8 @@ 

          journald-syslog.h

          journald-wall.c

          journald-wall.h

+         managed-journal-file.c

+         managed-journal-file.h

  '''.split())

  

  sources += custom_target(

@@ -7,9 +7,9 @@ 

  

  #include "alloc-util.h"

  #include "chattr-util.h"

- #include "journald-file.h"

  #include "journal-internal.h"

  #include "macro.h"

+ #include "managed-journal-file.h"

  #include "path-util.h"

  #include "string-util.h"

  
@@ -17,7 +17,7 @@ 

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

          _cleanup_free_ char *fn = NULL;

          char dn[] = "/var/tmp/test-journal-flush.XXXXXX";

-         JournaldFile *new_journal = NULL;

+         ManagedJournalFile *new_journal = NULL;

          sd_journal *j = NULL;

          unsigned n = 0;

          int r;
@@ -29,7 +29,7 @@ 

  

          fn = path_join(dn, "test.journal");

  

-         r = journald_file_open(-1, fn, O_CREAT|O_RDWR, 0644, false, 0, false, NULL, m, NULL, NULL, &new_journal);

+         r = managed_journal_file_open(-1, fn, O_CREAT|O_RDWR, 0644, false, 0, false, NULL, m, NULL, NULL, &new_journal);

          assert_se(r >= 0);

  

          if (argc > 1)
@@ -66,7 +66,7 @@ 

  

          sd_journal_close(j);

  

-         (void) journald_file_close(new_journal);

+         (void) managed_journal_file_close(new_journal);

  

          unlink(fn);

          assert_se(rmdir(dn) == 0);

@@ -8,9 +8,9 @@ 

  #include "alloc-util.h"

  #include "chattr-util.h"

  #include "io-util.h"

- #include "journald-file.h"

  #include "journal-vacuum.h"

  #include "log.h"

+ #include "managed-journal-file.h"

  #include "parse-util.h"

  #include "rm-rf.h"

  #include "tests.h"
@@ -33,22 +33,22 @@ 

                          log_assert_errno(#expr, -_r_, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__); \

          } while (false)

  

- static JournaldFile *test_open(const char *name) {

+ static ManagedJournalFile *test_open(const char *name) {

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

-         JournaldFile *f;

+         ManagedJournalFile *f;

  

          m = mmap_cache_new();

          assert_se(m != NULL);

  

-         assert_ret(journald_file_open(-1, name, O_RDWR|O_CREAT, 0644, true, UINT64_MAX, false, NULL, m, NULL, NULL, &f));

+         assert_ret(managed_journal_file_open(-1, name, O_RDWR|O_CREAT, 0644, true, UINT64_MAX, false, NULL, m, NULL, NULL, &f));

          return f;

  }

  

- static void test_close(JournaldFile *f) {

-         (void) journald_file_close(f);

+ static void test_close(ManagedJournalFile *f) {

+         (void) managed_journal_file_close(f);

  }

  

- static void append_number(JournaldFile *f, int n, uint64_t *seqnum) {

+ static void append_number(ManagedJournalFile *f, int n, uint64_t *seqnum) {

          char *p;

          dual_timestamp ts;

          static dual_timestamp previous_ts = {};
@@ -113,7 +113,7 @@ 

  }

  

  static void setup_sequential(void) {

-         JournaldFile *one, *two;

+         ManagedJournalFile *one, *two;

          one = test_open("one.journal");

          two = test_open("two.journal");

          append_number(one, 1, NULL);
@@ -125,7 +125,7 @@ 

  }

  

  static void setup_interleaved(void) {

-         JournaldFile *one, *two;

+         ManagedJournalFile *one, *two;

          one = test_open("one.journal");

          two = test_open("two.journal");

          append_number(one, 1, NULL);
@@ -205,7 +205,7 @@ 

  

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

          char t[] = "/var/tmp/journal-seq-XXXXXX";

-         JournaldFile *one, *two;

+         ManagedJournalFile *one, *two;

          uint64_t seqnum = 0;

          sd_id128_t seqnum_id;

  
@@ -214,7 +214,7 @@ 

  

          mkdtemp_chdir_chattr(t);

  

-         assert_se(journald_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0644,

+         assert_se(managed_journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0644,

                                       true, UINT64_MAX, false, NULL, m, NULL, NULL, &one) == 0);

  

          append_number(one, 1, &seqnum);
@@ -231,7 +231,7 @@ 

  

          memcpy(&seqnum_id, &one->file->header->seqnum_id, sizeof(sd_id128_t));

  

-         assert_se(journald_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0644,

+         assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0644,

                                       true, UINT64_MAX, false, NULL, m, NULL, one, &two) == 0);

  

          assert_se(two->file->header->state == STATE_ONLINE);
@@ -262,7 +262,7 @@ 

          /* restart server */

          seqnum = 0;

  

-         assert_se(journald_file_open(-1, "two.journal", O_RDWR, 0,

+         assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR, 0,

                                       true, UINT64_MAX, false, NULL, m, NULL, NULL, &two) == 0);

  

          assert_se(sd_id128_equal(two->file->header->seqnum_id, seqnum_id));
@@ -290,7 +290,7 @@ 

  int main(int argc, char *argv[]) {

          test_setup_logging(LOG_DEBUG);

  

-         /* journald_file_open requires a valid machine id */

+         /* managed_journal_file_open requires a valid machine id */

          if (access("/etc/machine-id", F_OK) != 0)

                  return log_tests_skipped("/etc/machine-id not found");

  

@@ -8,10 +8,10 @@ 

  #include "alloc-util.h"

  #include "chattr-util.h"

  #include "io-util.h"

- #include "journald-file.h"

  #include "journal-internal.h"

  #include "log.h"

  #include "macro.h"

+ #include "managed-journal-file.h"

  #include "parse-util.h"

  #include "rm-rf.h"

  #include "tests.h"
@@ -61,7 +61,7 @@ 

  

  static void run_test(void) {

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

-         JournaldFile *one, *two, *three;

+         ManagedJournalFile *one, *two, *three;

          char t[] = "/var/tmp/journal-stream-XXXXXX";

          unsigned i;

          _cleanup_(sd_journal_closep) sd_journal *j = NULL;
@@ -77,9 +77,9 @@ 

          assert_se(chdir(t) >= 0);

          (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);

  

-         assert_se(journald_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &one) == 0);

-         assert_se(journald_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &two) == 0);

-         assert_se(journald_file_open(-1, "three.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &three) == 0);

+         assert_se(managed_journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &one) == 0);

+         assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &two) == 0);

+         assert_se(managed_journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &three) == 0);

  

          for (i = 0; i < N_ENTRIES; i++) {

                  char *p, *q;
@@ -116,9 +116,9 @@ 

                  free(q);

          }

  

-         (void) journald_file_close(one);

-         (void) journald_file_close(two);

-         (void) journald_file_close(three);

+         (void) managed_journal_file_close(one);

+         (void) managed_journal_file_close(two);

+         (void) managed_journal_file_close(three);

  

          assert_se(sd_journal_open_directory(&j, t, 0) >= 0);

  
@@ -178,7 +178,7 @@ 

  

  int main(int argc, char *argv[]) {

  

-         /* journald_file_open requires a valid machine id */

+         /* managed_journal_file_open requires a valid machine id */

          if (access("/etc/machine-id", F_OK) != 0)

                  return log_tests_skipped("/etc/machine-id not found");

  

@@ -7,9 +7,9 @@ 

  #include "chattr-util.h"

  #include "fd-util.h"

  #include "io-util.h"

- #include "journald-file.h"

  #include "journal-verify.h"

  #include "log.h"

+ #include "managed-journal-file.h"

  #include "mmap-cache.h"

  #include "rm-rf.h"

  #include "terminal-util.h"
@@ -61,7 +61,7 @@ 

          char t[] = "/var/tmp/journal-XXXXXX";

          unsigned n;

          JournalFile *f;

-         JournaldFile *df;

+         ManagedJournalFile *df;

          const char *verification_key = argv[1];

          usec_t from = 0, to = 0, total = 0;

          struct stat st;
@@ -70,7 +70,7 @@ 

          m = mmap_cache_new();

          assert_se(m != NULL);

  

-         /* journald_file_open requires a valid machine id */

+         /* managed_journal_file_open requires a valid machine id */

          if (access("/etc/machine-id", F_OK) != 0)

                  return log_tests_skipped("/etc/machine-id not found");

  
@@ -82,7 +82,7 @@ 

  

          log_info("Generating...");

  

-         assert_se(journald_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, !!verification_key, NULL, m, NULL, NULL, &df) == 0);

+         assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, !!verification_key, NULL, m, NULL, NULL, &df) == 0);

  

          for (n = 0; n < N_ENTRIES; n++) {

                  struct iovec iovec;
@@ -100,7 +100,7 @@ 

                  free(test);

          }

  

-         (void) journald_file_close(df);

+         (void) managed_journal_file_close(df);

  

          log_info("Verifying...");

  

file modified
+27 -27
@@ -6,9 +6,9 @@ 

  #include "chattr-util.h"

  #include "io-util.h"

  #include "journal-authenticate.h"

- #include "journald-file.h"

  #include "journal-vacuum.h"

  #include "log.h"

+ #include "managed-journal-file.h"

  #include "rm-rf.h"

  #include "tests.h"

  
@@ -26,10 +26,10 @@ 

  static void test_non_empty(void) {

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

          dual_timestamp ts;

-         JournaldFile *f;

+         ManagedJournalFile *f;

          struct iovec iovec;

          static const char test[] = "TEST1=1", test2[] = "TEST2=2";

-         Object *o;

+         Object *o, *d;

          uint64_t p;

          sd_id128_t fake_boot_id;

          char t[] = "/var/tmp/journal-XXXXXX";
@@ -41,7 +41,7 @@ 

  

          mkdtemp_chdir_chattr(t);

  

-         assert_se(journald_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, true, NULL, m, NULL, NULL, &f) == 0);

+         assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, true, NULL, m, NULL, NULL, &f) == 0);

  

          assert_se(dual_timestamp_get(&ts));

          assert_se(sd_id128_randomize(&fake_boot_id) == 0);
@@ -75,21 +75,21 @@ 

          assert_se(journal_file_next_entry(f->file, 0, DIRECTION_DOWN, &o, &p) == 1);

          assert_se(le64toh(o->entry.seqnum) == 1);

  

-         assert_se(journal_file_find_data_object(f->file, test, strlen(test), NULL, &p) == 1);

-         assert_se(journal_file_next_entry_for_data(f->file, p, DIRECTION_DOWN, &o, NULL) == 1);

+         assert_se(journal_file_find_data_object(f->file, test, strlen(test), &d, NULL) == 1);

+         assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1);

          assert_se(le64toh(o->entry.seqnum) == 1);

  

-         assert_se(journal_file_next_entry_for_data(f->file, p, DIRECTION_UP, &o, NULL) == 1);

+         assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1);

          assert_se(le64toh(o->entry.seqnum) == 3);

  

-         assert_se(journal_file_find_data_object(f->file, test2, strlen(test2), NULL, &p) == 1);

-         assert_se(journal_file_next_entry_for_data(f->file, p, DIRECTION_UP, &o, NULL) == 1);

+         assert_se(journal_file_find_data_object(f->file, test2, strlen(test2), &d, NULL) == 1);

+         assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1);

          assert_se(le64toh(o->entry.seqnum) == 2);

  

-         assert_se(journal_file_next_entry_for_data(f->file, p, DIRECTION_DOWN, &o, NULL) == 1);

+         assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1);

          assert_se(le64toh(o->entry.seqnum) == 2);

  

-         assert_se(journal_file_find_data_object(f->file, "quux", 4, NULL, &p) == 0);

+         assert_se(journal_file_find_data_object(f->file, "quux", 4, &d, NULL) == 0);

  

          assert_se(journal_file_move_to_entry_by_seqnum(f->file, 1, DIRECTION_DOWN, &o, NULL) == 1);

          assert_se(le64toh(o->entry.seqnum) == 1);
@@ -102,10 +102,10 @@ 

  

          assert_se(journal_file_move_to_entry_by_seqnum(f->file, 10, DIRECTION_DOWN, &o, NULL) == 0);

  

-         journald_file_rotate(&f, m, true, UINT64_MAX, true, NULL);

-         journald_file_rotate(&f, m, true, UINT64_MAX, true, NULL);

+         managed_journal_file_rotate(&f, m, true, UINT64_MAX, true, NULL);

+         managed_journal_file_rotate(&f, m, true, UINT64_MAX, true, NULL);

  

-         (void) journald_file_close(f);

+         (void) managed_journal_file_close(f);

  

          log_info("Done...");

  
@@ -122,7 +122,7 @@ 

  

  static void test_empty(void) {

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

-         JournaldFile *f1, *f2, *f3, *f4;

+         ManagedJournalFile *f1, *f2, *f3, *f4;

          char t[] = "/var/tmp/journal-XXXXXX";

  

          test_setup_logging(LOG_DEBUG);
@@ -132,10 +132,10 @@ 

  

          mkdtemp_chdir_chattr(t);

  

-         assert_se(journald_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, false, UINT64_MAX, false, NULL, m, NULL, NULL, &f1) == 0);

-         assert_se(journald_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &f2) == 0);

-         assert_se(journald_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, 0666, false, UINT64_MAX, true, NULL, m, NULL, NULL, &f3) == 0);

-         assert_se(journald_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, true, NULL, m, NULL, NULL, &f4) == 0);

+         assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, false, UINT64_MAX, false, NULL, m, NULL, NULL, &f1) == 0);

+         assert_se(managed_journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, false, NULL, m, NULL, NULL, &f2) == 0);

+         assert_se(managed_journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, 0666, false, UINT64_MAX, true, NULL, m, NULL, NULL, &f3) == 0);

+         assert_se(managed_journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, 0666, true, UINT64_MAX, true, NULL, m, NULL, NULL, &f4) == 0);

  

          journal_file_print_header(f1->file);

          puts("");
@@ -156,17 +156,17 @@ 

                  assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);

          }

  

-         (void) journald_file_close(f1);

-         (void) journald_file_close(f2);

-         (void) journald_file_close(f3);

-         (void) journald_file_close(f4);

+         (void) managed_journal_file_close(f1);

+         (void) managed_journal_file_close(f2);

+         (void) managed_journal_file_close(f3);

+         (void) managed_journal_file_close(f4);

  }

  

  #if HAVE_COMPRESSION

  static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {

          _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;

          dual_timestamp ts;

-         JournaldFile *f;

+         ManagedJournalFile *f;

          struct iovec iovec;

          Object *o;

          uint64_t p;
@@ -184,7 +184,7 @@ 

  

          mkdtemp_chdir_chattr(t);

  

-         assert_se(journald_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, compress_threshold, true, NULL, m, NULL, NULL, &f) == 0);

+         assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, compress_threshold, true, NULL, m, NULL, NULL, &f) == 0);

  

          dual_timestamp_get(&ts);

  
@@ -211,7 +211,7 @@ 

  

          is_compressed = (o->object.flags & OBJECT_COMPRESSION_MASK) != 0;

  

-         (void) journald_file_close(f);

+         (void) managed_journal_file_close(f);

  

          log_info("Done...");

  
@@ -254,7 +254,7 @@ 

  

          test_setup_logging(LOG_INFO);

  

-         /* journald_file_open requires a valid machine id */

+         /* managed_journal_file_open requires a valid machine id */

          if (access("/etc/machine-id", F_OK) != 0)

                  return log_tests_skipped("/etc/machine-id not found");

  

@@ -248,18 +248,18 @@ 

          case OBJECT_DATA:

                  /* All but hash and payload are mutable */

                  gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));

-                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));

+                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));

                  break;

  

          case OBJECT_FIELD:

                  /* Same here */

                  gcry_md_write(f->hmac, &o->field.hash, sizeof(o->field.hash));

-                 gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(FieldObject, payload));

+                 gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));

                  break;

  

          case OBJECT_ENTRY:

                  /* All */

-                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));

+                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(Object, entry.seqnum));

                  break;

  

          case OBJECT_FIELD_HASH_TABLE:

@@ -91,8 +91,7 @@ 

  #  pragma GCC diagnostic ignored "-Waddress-of-packed-member"

  #endif

  

- int journal_file_tail_end(JournalFile *f, uint64_t *ret_offset) {

-         Object tail;

+ int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset) {

          uint64_t p;

          int r;

  
@@ -100,13 +99,17 @@ 

          assert(f->header);

          assert(ret_offset);

  

+         /* Same as journal_file_tail_end_by_mmap() below, but operates with pread() to avoid the mmap cache

+          * (and thus is thread safe) */

+ 

          p = le64toh(f->header->tail_object_offset);

          if (p == 0)

                  p = le64toh(f->header->header_size);

          else {

+                 Object tail;

                  uint64_t sz;

  

-                 r = journal_file_read_object(f, OBJECT_UNUSED, p, &tail);

+                 r = journal_file_read_object_header(f, OBJECT_UNUSED, p, &tail);

                  if (r < 0)

                          return r;

  
@@ -126,6 +129,43 @@ 

          return 0;

  }

  

+ int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset) {

+         uint64_t p;

+         int r;

+ 

+         assert(f);

+         assert(f->header);

+         assert(ret_offset);

+ 

+         /* Same as journal_file_tail_end_by_pread() above, but operates with the usual mmap logic */

+ 

+         p = le64toh(f->header->tail_object_offset);

+         if (p == 0)

+                 p = le64toh(f->header->header_size);

+         else {

+                 Object *tail;

+                 uint64_t sz;

+ 

+                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);

+                 if (r < 0)

+                         return r;

+ 

+                 sz = le64toh(READ_NOW(tail->object.size));

+                 if (sz > UINT64_MAX - sizeof(uint64_t) + 1)

+                         return -EBADMSG;

+ 

+                 sz = ALIGN64(sz);

+                 if (p > UINT64_MAX - sz)

+                         return -EBADMSG;

+ 

+                 p += sz;

+         }

+ 

+         *ret_offset = p;

+ 

+         return 0;

+ }

+ 

  int journal_file_set_offline_thread_join(JournalFile *f) {

          int r;

  
@@ -618,10 +658,10 @@ 

                                                 le64toh(o->data.n_entries),

                                                 offset);

  

-                 if (le64toh(o->object.size) <= offsetof(DataObject, payload))

+                 if (le64toh(o->object.size) <= offsetof(Object, data.payload))

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,

-                                                offsetof(DataObject, payload),

+                                                offsetof(Object, data.payload),

                                                 le64toh(o->object.size),

                                                 offset);

  
@@ -640,10 +680,10 @@ 

                  break;

  

          case OBJECT_FIELD:

-                 if (le64toh(o->object.size) <= offsetof(FieldObject, payload))

+                 if (le64toh(o->object.size) <= offsetof(Object, field.payload))

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,

-                                                offsetof(FieldObject, payload),

+                                                offsetof(Object, field.payload),

                                                 le64toh(o->object.size),

                                                 offset);

  
@@ -660,18 +700,18 @@ 

                  uint64_t sz;

  

                  sz = le64toh(READ_NOW(o->object.size));

-                 if (sz < offsetof(EntryObject, items) ||

-                     (sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)

+                 if (sz < offsetof(Object, entry.items) ||

+                     (sz - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0)

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,

-                                                offsetof(EntryObject, items),

+                                                offsetof(Object, entry.items),

                                                 sz,

                                                 offset);

  

-                 if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)

+                 if ((sz - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0)

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Invalid number items in entry: %" PRIu64 ": %" PRIu64,

-                                                (sz - offsetof(EntryObject, items)) / sizeof(EntryItem),

+                                                (sz - offsetof(Object, entry.items)) / sizeof(EntryItem),

                                                 offset);

  

                  if (le64toh(o->entry.seqnum) <= 0)
@@ -700,9 +740,9 @@ 

                  uint64_t sz;

  

                  sz = le64toh(READ_NOW(o->object.size));

-                 if (sz < offsetof(HashTableObject, items) ||

-                     (sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||

-                     (sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)

+                 if (sz < offsetof(Object, hash_table.items) ||

+                     (sz - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 ||

+                     (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0)

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,

                                                 o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
@@ -716,9 +756,9 @@ 

                  uint64_t sz;

  

                  sz = le64toh(READ_NOW(o->object.size));

-                 if (sz < offsetof(EntryArrayObject, items) ||

-                     (sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||

-                     (sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)

+                 if (sz < offsetof(Object, entry_array.items) ||

+                     (sz - offsetof(Object, entry_array.items)) % sizeof(le64_t) != 0 ||

+                     (sz - offsetof(Object, entry_array.items)) / sizeof(le64_t) <= 0)

                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                                 "Invalid object entry array size: %" PRIu64 ": %" PRIu64,

                                                 sz,
@@ -758,7 +798,6 @@ 

          uint64_t s;

  

          assert(f);

-         assert(ret);

  

          /* Objects may only be located at multiple of 64 bit */

          if (!VALID64(offset))
@@ -813,17 +852,19 @@ 

          if (r < 0)

                  return r;

  

-         *ret = o;

+         if (ret)

+                 *ret = o;

+ 

          return 0;

  }

  

- int journal_file_read_object(JournalFile *f, ObjectType type, uint64_t offset, Object *ret) {

-         int r;

-         Object o;

+ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret) {

          uint64_t s;

+         ssize_t n;

+         Object o;

+         int r;

  

          assert(f);

-         assert(ret);

  

          /* Objects may only be located at multiple of 64 bit */

          if (!VALID64(offset))
@@ -838,17 +879,22 @@ 

                                         offset);

  

          /* This will likely read too much data but it avoids having to call pread() twice. */

-         r = pread(f->fd, &o, sizeof(Object), offset);

-         if (r < 0)

-                 return r;

+         n = pread(f->fd, &o, sizeof(o), offset);

+         if (n < 0)

+                 return log_debug_errno(errno, "Failed to read journal file at offset: %" PRIu64,

+                                        offset);

  

-         s = le64toh(o.object.size);

+         if ((size_t) n < sizeof(o.object))

+                 return log_debug_errno(SYNTHETIC_ERRNO(EIO),

+                                        "Failed to read short object at offset: %" PRIu64,

+                                        offset);

  

+         s = le64toh(o.object.size);

          if (s == 0)

                  return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                         "Attempt to read uninitialized object: %" PRIu64,

                                         offset);

-         if (s < sizeof(ObjectHeader))

+         if (s < sizeof(o.object))

                  return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                         "Attempt to read overly short object: %" PRIu64,

                                         offset);
@@ -863,6 +909,11 @@ 

                                         "Attempt to read truncated object: %" PRIu64,

                                         offset);

  

+         if ((size_t) n < minimum_header_size(&o))

+                 return log_debug_errno(SYNTHETIC_ERRNO(EIO),

+                                        "Short read while reading object: %" PRIu64,

+                                        offset);

+ 

          if (type > OBJECT_UNUSED && o.object.type != type)

                  return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),

                                         "Attempt to read object of unexpected type: %" PRIu64,
@@ -872,7 +923,9 @@ 

          if (r < 0)

                  return r;

  

-         *ret = o;

+         if (ret)

+                 *ret = o;

+ 

          return 0;

  }

  
@@ -928,7 +981,7 @@ 

          if (r < 0)

                  return r;

  

-         r = journal_file_tail_end(f, &p);

+         r = journal_file_tail_end_by_mmap(f, &p);

          if (r < 0)

                  return r;

  
@@ -1453,19 +1506,11 @@ 

  

          hash = journal_file_hash_data(f, field, size);

  

-         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);

+         r = journal_file_find_field_object_with_hash(f, field, size, hash, ret, ret_offset);

          if (r < 0)

                  return r;

-         if (r > 0) {

- 

-                 if (ret)

-                         *ret = o;

- 

-                 if (ret_offset)

-                         *ret_offset = p;

- 

+         if (r > 0)

                  return 0;

-         }

  

          osize = offsetof(Object, field.payload) + size;

          r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
@@ -1479,20 +1524,20 @@ 

          if (r < 0)

                  return r;

  

-         /* The linking might have altered the window, so let's

-          * refresh our pointer */

-         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);

-         if (r < 0)

-                 return r;

+         /* The linking might have altered the window, so let's only pass the offset to hmac which will

+          * move to the object again if needed. */

  

  #if HAVE_GCRYPT

-         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);

+         r = journal_file_hmac_put_object(f, OBJECT_FIELD, NULL, p);

          if (r < 0)

                  return r;

  #endif

  

-         if (ret)

-                 *ret = o;

+         if (ret) {

+                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, ret);

+                 if (r < 0)

+                         return r;

+         }

  

          if (ret_offset)

                  *ret_offset = p;
@@ -1517,19 +1562,11 @@ 

  

          hash = journal_file_hash_data(f, data, size);

  

-         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);

+         r = journal_file_find_data_object_with_hash(f, data, size, hash, ret, ret_offset);

          if (r < 0)

                  return r;

-         if (r > 0) {

- 

-                 if (ret)

-                         *ret = o;

- 

-                 if (ret_offset)

-                         *ret_offset = p;

- 

+         if (r > 0)

                  return 0;

-         }

  

          eq = memchr(data, '=', size);

          if (!eq)
@@ -1567,17 +1604,16 @@ 

          if (r < 0)

                  return r;

  

- #if HAVE_GCRYPT

-         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);

+         /* The linking might have altered the window, so let's refresh our pointer. */

+         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);

          if (r < 0)

                  return r;

- #endif

  

-         /* The linking might have altered the window, so let's

-          * refresh our pointer */

-         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);

+ #if HAVE_GCRYPT

+         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);

          if (r < 0)

                  return r;

+ #endif

  

          /* Create field object ... */

          r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
@@ -1801,12 +1837,20 @@ 

          /* Link up the items */

          n = journal_file_entry_n_items(o);

          for (uint64_t i = 0; i < n; i++) {

-                 r = journal_file_link_entry_item(f, o, offset, i);

-                 if (r < 0)

-                         return r;

+                 int k;

+ 

+                 /* If we fail to link an entry item because we can't allocate a new entry array, don't fail

+                  * immediately but try to link the other entry items since it might still be possible to link

+                  * those if they don't require a new entry array to be allocated. */

+ 

+                 k = journal_file_link_entry_item(f, o, offset, i);

+                 if (k == -E2BIG)

+                         r = k;

+                 else if (k < 0)

+                         return k;

          }

  

-         return 0;

+         return r;

  }

  

  static int journal_file_append_entry_internal(
@@ -2119,6 +2163,41 @@ 

          return 1;

  }

  

+ static int bump_entry_array(JournalFile *f, Object *o, uint64_t offset, uint64_t first, direction_t direction, uint64_t *ret) {

+         uint64_t p, q = 0;

+         int r;

+ 

+         assert(f);

+         assert(offset);

+         assert(ret);

+ 

+         if (direction == DIRECTION_DOWN)

+                 return le64toh(o->entry_array.next_entry_array_offset);

+ 

+         /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have

+          * to start iterating from the top. */

+ 

+         p = first;

+ 

+         while (p > 0 && p != offset) {

+                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o);

+                 if (r < 0)

+                         return r;

+ 

+                 q = p;

+                 p = le64toh(o->entry_array.next_entry_array_offset);

+         }

+ 

+         /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a

+          * corrupted journal file. */

+         if (p == 0)

+                 return -EBADMSG;

+ 

+         *ret = q;

+ 

+         return 0;

+ }

+ 

  static int generic_array_get(

                  JournalFile *f,

                  uint64_t first,
@@ -2126,7 +2205,7 @@ 

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         Object *o, *e;

+         Object *o;

          uint64_t p = 0, a, t = 0, k;

          int r;

          ChainCacheItem *ci;
@@ -2145,6 +2224,24 @@ 

  

          while (a > 0) {

                  r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);

+                 if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) {

+                         /* If there's corruption and we're going downwards, let's pretend we reached the

+                          * final entry in the entry array chain. */

+ 

+                         if (direction == DIRECTION_DOWN)

+                                 return 0;

+ 

+                         /* If there's corruption and we're going upwards, move back to the previous entry

+                          * array and start iterating entries from there. */

+ 

+                         r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a);

+                         if (r < 0)

+                                 return r;

+ 

+                         i = UINT64_MAX;

+ 

+                         break;

+                 }

                  if (r < 0)

                          return r;

  
@@ -2178,9 +2275,16 @@ 

                  do {

                          p = le64toh(o->entry_array.items[i]);

  

-                         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &e);

-                         if (r >= 0)

-                                 goto found;

+                         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret);

+                         if (r >= 0) {

+                                 /* Let's cache this item for the next invocation */

+                                 chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);

+ 

+                                 if (ret_offset)

+                                         *ret_offset = p;

+ 

+                                 return 1;

+                         }

                          if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))

                                  return r;

  
@@ -2189,24 +2293,15 @@ 

                          log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);

                  } while (bump_array_index(&i, direction, k) > 0);

  

+                 r = bump_entry_array(f, o, a, first, direction, &a);

+                 if (r < 0)

+                         return r;

+ 

                  t += k;

-                 a = le64toh(o->entry_array.next_entry_array_offset);

                  i = UINT64_MAX;

          }

  

          return 0;

- 

- found:

-         /* Let's cache this item for the next invocation */

-         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);

- 

-         if (ret)

-                 *ret = e;

- 

-         if (ret_offset)

-                 *ret_offset = p;

- 

-         return 1;

  }

  

  static int generic_array_get_plus_one(
@@ -2217,21 +2312,17 @@ 

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         Object *o;

          int r;

  

          assert(f);

  

          if (i == 0) {

-                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);

+                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret);

                  if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))

                          return generic_array_get(f, first, 0, direction, ret, ret_offset);

                  if (r < 0)

                          return r;

  

-                 if (ret)

-                         *ret = o;

- 

                  if (ret_offset)

                          *ret_offset = extra;

  
@@ -2260,7 +2351,7 @@ 

  

          uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = UINT64_MAX;

          bool subtract_one = false;

-         Object *o, *array = NULL;

+         Object *array = NULL;

          int r;

          ChainCacheItem *ci;

  
@@ -2448,12 +2539,11 @@ 

          else

                  p = le64toh(array->entry_array.items[i]);

  

-         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);

-         if (r < 0)

-                 return r;

- 

-         if (ret)

-                 *ret = o;

+         if (ret) {

+                 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret);

+                 if (r < 0)

+                         return r;

+         }

  

          if (ret_offset)

                  *ret_offset = p;
@@ -2478,7 +2568,6 @@ 

  

          int r;

          bool step_back = false;

-         Object *o;

  

          assert(f);

          assert(test_object);
@@ -2521,12 +2610,11 @@ 

          return r;

  

  found:

-         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);

-         if (r < 0)

-                 return r;

- 

-         if (ret)

-                 *ret = o;

+         if (ret) {

+                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret);

+                 if (r < 0)

+                         return r;

+         }

  

          if (ret_offset)

                  *ret_offset = extra;
@@ -2549,6 +2637,26 @@ 

                  return TEST_RIGHT;

  }

  

+ int journal_file_move_to_entry_by_offset(

+                 JournalFile *f,

+                 uint64_t p,

+                 direction_t direction,

+                 Object **ret,

+                 uint64_t *ret_offset) {

+ 

+         assert(f);

+         assert(f->header);

+ 

+         return generic_array_bisect(

+                         f,

+                         le64toh(f->header->entry_array_offset),

+                         le64toh(f->header->n_entries),

+                         p,

+                         test_object_offset,

+                         direction,

+                         ret, ret_offset, NULL);

+ }

+ 

  static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {

          uint64_t sq;

          Object *o;
@@ -2828,19 +2936,16 @@ 

  

  int journal_file_next_entry_for_data(

                  JournalFile *f,

-                 uint64_t data_offset,

+                 Object *d,

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

          uint64_t i, n, ofs;

-         Object *d;

          int r;

  

          assert(f);

- 

-         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);

-         if (r < 0)

-                 return r;

+         assert(d);

+         assert(d->object.type == OBJECT_DATA);

  

          n = le64toh(READ_NOW(d->data.n_entries));

          if (n <= 0)
@@ -2865,19 +2970,14 @@ 

  

  int journal_file_move_to_entry_by_offset_for_data(

                  JournalFile *f,

-                 uint64_t data_offset,

+                 Object *d,

                  uint64_t p,

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         int r;

-         Object *d;

- 

          assert(f);

- 

-         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);

-         if (r < 0)

-                 return r;

+         assert(d);

+         assert(d->object.type == OBJECT_DATA);

  

          return generic_array_bisect_plus_one(

                          f,
@@ -2892,17 +2992,24 @@ 

  

  int journal_file_move_to_entry_by_monotonic_for_data(

                  JournalFile *f,

-                 uint64_t data_offset,

+                 Object *d,

                  sd_id128_t boot_id,

                  uint64_t monotonic,

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         Object *o, *d;

+         Object *o;

          int r;

-         uint64_t b, z;

+         uint64_t b, z, entry_offset, entry_array_offset, n_entries;

  

          assert(f);

+         assert(d);

+         assert(d->object.type == OBJECT_DATA);

+ 

+         /* Save all the required data before the data object gets invalidated. */

+         entry_offset = le64toh(READ_NOW(d->data.entry_offset));

+         entry_array_offset = le64toh(READ_NOW(d->data.entry_array_offset));

+         n_entries = le64toh(READ_NOW(d->data.n_entries));

  

          /* First, seek by time */

          r = find_data_object_by_boot_id(f, boot_id, &o, &b);
@@ -2925,18 +3032,17 @@ 

          /* And now, continue seeking until we find an entry that

           * exists in both bisection arrays */

  

+         r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);

+         if (r < 0)

+                 return r;

+ 

          for (;;) {

-                 Object *qo;

                  uint64_t p, q;

  

-                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);

-                 if (r < 0)

-                         return r;

- 

                  r = generic_array_bisect_plus_one(f,

-                                                   le64toh(d->data.entry_offset),

-                                                   le64toh(d->data.entry_array_offset),

-                                                   le64toh(d->data.n_entries),

+                                                   entry_offset,

+                                                   entry_array_offset,

+                                                   n_entries,

                                                    z,

                                                    test_object_offset,

                                                    direction,
@@ -2944,10 +3050,6 @@ 

                  if (r <= 0)

                          return r;

  

-                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);

-                 if (r < 0)

-                         return r;

- 

                  r = generic_array_bisect_plus_one(f,

                                                    le64toh(o->data.entry_offset),

                                                    le64toh(o->data.entry_array_offset),
@@ -2955,14 +3057,18 @@ 

                                                    p,

                                                    test_object_offset,

                                                    direction,

-                                                   &qo, &q, NULL);

+                                                   NULL, &q, NULL);

  

                  if (r <= 0)

                          return r;

  

                  if (p == q) {

-                         if (ret)

-                                 *ret = qo;

+                         if (ret) {

+                                 r = journal_file_move_to_object(f, OBJECT_ENTRY, q, ret);

+                                 if (r < 0)

+                                         return r;

+                         }

+ 

                          if (ret_offset)

                                  *ret_offset = q;

  
@@ -2975,19 +3081,14 @@ 

  

  int journal_file_move_to_entry_by_seqnum_for_data(

                  JournalFile *f,

-                 uint64_t data_offset,

+                 Object *d,

                  uint64_t seqnum,

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         Object *d;

-         int r;

- 

          assert(f);

- 

-         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);

-         if (r < 0)

-                 return r;

+         assert(d);

+         assert(d->object.type == OBJECT_DATA);

  

          return generic_array_bisect_plus_one(

                          f,
@@ -3002,19 +3103,14 @@ 

  

  int journal_file_move_to_entry_by_realtime_for_data(

                  JournalFile *f,

-                 uint64_t data_offset,

+                 Object *d,

                  uint64_t realtime,

                  direction_t direction,

                  Object **ret, uint64_t *ret_offset) {

  

-         Object *d;

-         int r;

- 

          assert(f);

- 

-         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);

-         if (r < 0)

-                 return r;

+         assert(d);

+         assert(d->object.type == OBJECT_DATA);

  

          return generic_array_bisect_plus_one(

                          f,
@@ -3274,7 +3370,7 @@ 

          r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");

          if (r < 0) {

                  if (r != -ENXIO)

-                         log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");

+                         log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring: %m");

                  f->keyed_hash = true;

          } else

                  f->keyed_hash = r;
@@ -3576,21 +3672,16 @@ 

  

          for (uint64_t i = 0; i < n; i++) {

                  uint64_t l, h;

-                 le64_t le_hash;

                  size_t t;

                  void *data;

                  Object *u;

  

                  q = le64toh(o->entry.items[i].object_offset);

-                 le_hash = o->entry.items[i].hash;

  

                  r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);

                  if (r < 0)

                          return r;

  

-                 if (le_hash != o->data.hash)

-                         return -EBADMSG;

- 

                  l = le64toh(READ_NOW(o->object.size));

                  if (l < offsetof(Object, data.payload))

                          return -EBADMSG;

@@ -185,15 +185,16 @@ 

          FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)

  

  int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);

- int journal_file_read_object(JournalFile *f, ObjectType type, uint64_t offset, Object *ret);

+ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret);

  

- int journal_file_tail_end(JournalFile *f, uint64_t *ret_offset);

+ int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset);

+ int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset);

  

  uint64_t journal_file_entry_n_items(Object *o) _pure_;

  uint64_t journal_file_entry_array_n_items(Object *o) _pure_;

  uint64_t journal_file_hash_table_n_items(Object *o) _pure_;

  

- int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset);

+ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *ret_offset);

  int journal_file_append_entry(

                  JournalFile *f,

                  const dual_timestamp *ts,
@@ -201,29 +202,30 @@ 

                  const struct iovec iovec[], unsigned n_iovec,

                  uint64_t *seqno,

                  Object **ret,

-                 uint64_t *offset);

+                 uint64_t *ret_offset);

  

- int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset);

- int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);

+ int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *ret_offset);

+ int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *ret_offset);

  

- int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret, uint64_t *offset);

- int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);

+ int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret, uint64_t *ret_offset);

+ int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret, uint64_t *ret_offset);

  

  void journal_file_reset_location(JournalFile *f);

  void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset);

  int journal_file_compare_locations(JournalFile *af, JournalFile *bf);

- int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);

+ int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret, uint64_t *ret_offset);

  

- int journal_file_next_entry_for_data(JournalFile *f, uint64_t data_offset, direction_t direction, Object **ret, uint64_t *offset);

+ int journal_file_next_entry_for_data(JournalFile *f, Object *d, direction_t direction, Object **ret, uint64_t *ret_offset);

  

- int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);

- int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);

- int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);

+ int journal_file_move_to_entry_by_offset(JournalFile *f, uint64_t p, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *ret_offset);

  

- int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, uint64_t data_offset, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);

- int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, uint64_t data_offset, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);

- int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, uint64_t data_offset, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);

- int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, uint64_t data_offset, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);

+ int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, Object *d, uint64_t p, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret, uint64_t *ret_offset);

+ int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, Object *d, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *ret_offset);

  

  int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p);

  

@@ -137,8 +137,6 @@ 

  }

  

  static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) {

-         uint64_t i;

- 

          assert(f);

          assert(offset);

          assert(o);
@@ -169,9 +167,9 @@ 

                          return -EBADMSG;

                  }

  

-                 if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0) {

+                 if (le64toh(o->object.size) - offsetof(Object, data.payload) <= 0) {

                          error(offset, "Bad object size (<= %zu): %"PRIu64,

-                               offsetof(DataObject, payload),

+                               offsetof(Object, data.payload),

                                le64toh(o->object.size));

                          return -EBADMSG;

                  }
@@ -207,10 +205,10 @@ 

                  uint64_t h1, h2;

                  int r;

  

-                 if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0) {

+                 if (le64toh(o->object.size) - offsetof(Object, field.payload) <= 0) {

                          error(offset,

                                "Bad field size (<= %zu): %"PRIu64,

-                               offsetof(FieldObject, payload),

+                               offsetof(Object, field.payload),

                                le64toh(o->object.size));

                          return -EBADMSG;

                  }
@@ -239,18 +237,18 @@ 

          }

  

          case OBJECT_ENTRY:

-                 if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0) {

+                 if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0) {

                          error(offset,

                                "Bad entry size (<= %zu): %"PRIu64,

-                               offsetof(EntryObject, items),

+                               offsetof(Object, entry.items),

                                le64toh(o->object.size));

                          return -EBADMSG;

                  }

  

-                 if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0) {

+                 if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0) {

                          error(offset,

                                "Invalid number items in entry: %"PRIu64,

-                               (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem));

+                               (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem));

                          return -EBADMSG;

                  }

  
@@ -275,7 +273,7 @@ 

                          return -EBADMSG;

                  }

  

-                 for (i = 0; i < journal_file_entry_n_items(o); i++) {

+                 for (uint64_t i = 0; i < journal_file_entry_n_items(o); i++) {

                          if (le64toh(o->entry.items[i].object_offset) == 0 ||

                              !VALID64(le64toh(o->entry.items[i].object_offset))) {

                                  error(offset,
@@ -290,8 +288,8 @@ 

  

          case OBJECT_DATA_HASH_TABLE:

          case OBJECT_FIELD_HASH_TABLE:

-                 if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||

-                     (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0) {

+                 if ((le64toh(o->object.size) - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 ||

+                     (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0) {

                          error(offset,

                                "Invalid %s size: %"PRIu64,

                                journal_object_type_to_string(o->object.type),
@@ -299,7 +297,7 @@ 

                          return -EBADMSG;

                  }

  

-                 for (i = 0; i < journal_file_hash_table_n_items(o); i++) {

+                 for (uint64_t i = 0; i < journal_file_hash_table_n_items(o); i++) {

                          if (o->hash_table.items[i].head_hash_offset != 0 &&

                              !VALID64(le64toh(o->hash_table.items[i].head_hash_offset))) {

                                  error(offset,
@@ -334,8 +332,8 @@ 

                  break;

  

          case OBJECT_ENTRY_ARRAY:

-                 if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||

-                     (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0) {

+                 if ((le64toh(o->object.size) - offsetof(Object, entry_array.items)) % sizeof(le64_t) != 0 ||

+                     (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(le64_t) <= 0) {

                          error(offset,

                                "Invalid object entry array size: %"PRIu64,

                                le64toh(o->object.size));
@@ -349,7 +347,7 @@ 

                          return -EBADMSG;

                  }

  

-                 for (i = 0; i < journal_file_entry_array_n_items(o); i++)

+                 for (uint64_t i = 0; i < journal_file_entry_array_n_items(o); i++)

                          if (le64toh(o->entry_array.items[i]) != 0 &&

                              !VALID64(le64toh(o->entry_array.items[i]))) {

                                  error(offset,
@@ -422,92 +420,6 @@ 

          return 0;

  }

  

- static int entry_points_to_data(

-                 JournalFile *f,

-                 MMapFileDescriptor *cache_entry_fd,

-                 uint64_t n_entries,

-                 uint64_t entry_p,

-                 uint64_t data_p) {

- 

-         int r;

-         uint64_t i, n, a;

-         Object *o;

-         bool found = false;

- 

-         assert(f);

-         assert(cache_entry_fd);

- 

-         if (!contains_uint64(cache_entry_fd, n_entries, entry_p)) {

-                 error(data_p, "Data object references invalid entry at "OFSfmt, entry_p);

-                 return -EBADMSG;

-         }

- 

-         r = journal_file_move_to_object(f, OBJECT_ENTRY, entry_p, &o);

-         if (r < 0)

-                 return r;

- 

-         n = journal_file_entry_n_items(o);

-         for (i = 0; i < n; i++)

-                 if (le64toh(o->entry.items[i].object_offset) == data_p) {

-                         found = true;

-                         break;

-                 }

- 

-         if (!found) {

-                 error(entry_p, "Data object at "OFSfmt" not referenced by linked entry", data_p);

-                 return -EBADMSG;

-         }

- 

-         /* Check if this entry is also in main entry array. Since the

-          * main entry array has already been verified we can rely on

-          * its consistency. */

- 

-         i = 0;

-         n = le64toh(f->header->n_entries);

-         a = le64toh(f->header->entry_array_offset);

- 

-         while (i < n) {

-                 uint64_t m, u;

- 

-                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);

-                 if (r < 0)

-                         return r;

- 

-                 m = journal_file_entry_array_n_items(o);

-                 u = MIN(n - i, m);

- 

-                 if (entry_p <= le64toh(o->entry_array.items[u-1])) {

-                         uint64_t x, y, z;

- 

-                         x = 0;

-                         y = u;

- 

-                         while (x < y) {

-                                 z = (x + y) / 2;

- 

-                                 if (le64toh(o->entry_array.items[z]) == entry_p)

-                                         return 0;

- 

-                                 if (x + 1 >= y)

-                                         break;

- 

-                                 if (entry_p < le64toh(o->entry_array.items[z]))

-                                         y = z;

-                                 else

-                                         x = z;

-                         }

- 

-                         error(entry_p, "Entry object doesn't exist in main entry array");

-                         return -EBADMSG;

-                 }

- 

-                 i += u;

-                 a = le64toh(o->entry_array.next_entry_array_offset);

-         }

- 

-         return 0;

- }

- 

  static int verify_data(

                  JournalFile *f,

                  Object *o, uint64_t p,
@@ -538,9 +450,18 @@ 

          assert(o->data.entry_offset);

  

          last = q = le64toh(o->data.entry_offset);

-         r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);

+         if (!contains_uint64(cache_entry_fd, n_entries, q)) {

+                 error(p, "Data object references invalid entry at "OFSfmt, q);

+                 return -EBADMSG;

+         }

+ 

+         r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL);

          if (r < 0)

                  return r;

+         if (r == 0) {

+                 error(q, "Entry object doesn't exist in the main entry array");

+                 return -EBADMSG;

+         }

  

          i = 1;

          while (i < n) {
@@ -576,9 +497,18 @@ 

                          }

                          last = q;

  

-                         r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);

+                         if (!contains_uint64(cache_entry_fd, n_entries, q)) {

+                                 error(p, "Data object references invalid entry at "OFSfmt, q);

+                                 return -EBADMSG;

+                         }

+ 

+                         r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL);

                          if (r < 0)

                                  return r;

+                         if (r == 0) {

+                                 error(q, "Entry object doesn't exist in the main entry array");

+                                 return -EBADMSG;

+                         }

  

                          /* Pointer might have moved, reposition */

                          r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@@ -703,7 +633,8 @@ 

  static int verify_entry(

                  JournalFile *f,

                  Object *o, uint64_t p,

-                 MMapFileDescriptor *cache_data_fd, uint64_t n_data) {

+                 MMapFileDescriptor *cache_data_fd, uint64_t n_data,

+                 bool last) {

  

          uint64_t i, n;

          int r;
@@ -714,11 +645,10 @@ 

  

          n = journal_file_entry_n_items(o);

          for (i = 0; i < n; i++) {

-                 uint64_t q, h;

+                 uint64_t q;

                  Object *u;

  

                  q = le64toh(o->entry.items[i].object_offset);

-                 h = le64toh(o->entry.items[i].hash);

  

                  if (!contains_uint64(cache_data_fd, n_data, q)) {

                          error(p, "Invalid data object of entry");
@@ -729,16 +659,23 @@ 

                  if (r < 0)

                          return r;

  

-                 if (le64toh(u->data.hash) != h) {

-                         error(p, "Hash mismatch for data object of entry");

+                 r = data_object_in_hash_table(f, le64toh(u->data.hash), q);

+                 if (r < 0)

+                         return r;

+                 if (r == 0) {

+                         error(p, "Data object missing from hash table");

                          return -EBADMSG;

                  }

  

-                 r = data_object_in_hash_table(f, h, q);

+                 r = journal_file_move_to_entry_by_offset_for_data(f, u, p, DIRECTION_DOWN, NULL, NULL);

                  if (r < 0)

                          return r;

-                 if (r == 0) {

-                         error(p, "Data object missing from hash table");

+ 

+                 /* The last entry object has a very high chance of not being referenced as journal files

+                  * almost always run out of space during linking of entry items when trying to add a new

+                  * entry array so let's not error in that scenario. */

+                 if (r == 0 && !last) {

+                         error(p, "Entry object not referenced by linked data object at "OFSfmt, q);

                          return -EBADMSG;

                  }

          }
@@ -812,7 +749,7 @@ 

                          if (r < 0)

                                  return r;

  

-                         r = verify_entry(f, o, p, cache_data_fd, n_data);

+                         r = verify_entry(f, o, p, cache_data_fd, n_data, /*last=*/ i + 1 == n);

                          if (r < 0)

                                  return r;

  
@@ -842,21 +779,21 @@ 

                  return -EBADMSG;

          }

  

-         if (header_offset != p + offsetof(HashTableObject, items)) {

+         if (header_offset != p + offsetof(Object, hash_table.items)) {

                  error(p,

                        "Header offset for %s invalid (%" PRIu64 " != %" PRIu64 ")",

                        journal_object_type_to_string(o->object.type),

                        header_offset,

-                       p + offsetof(HashTableObject, items));

+                       p + offsetof(Object, hash_table.items));

                  return -EBADMSG;

          }

  

-         if (header_size != le64toh(o->object.size) - offsetof(HashTableObject, items)) {

+         if (header_size != le64toh(o->object.size) - offsetof(Object, hash_table.items)) {

                  error(p,

                        "Header size for %s invalid (%" PRIu64 " != %" PRIu64 ")",

                        journal_object_type_to_string(o->object.type),

                        header_size,

-                       le64toh(o->object.size) - offsetof(HashTableObject, items));

+                       le64toh(o->object.size) - offsetof(Object, hash_table.items));

                  return -EBADMSG;

          }

  

@@ -501,7 +501,8 @@ 

          assert(f);

  

          if (m->type == MATCH_DISCRETE) {

-                 uint64_t dp, hash;

+                 Object *d;

+                 uint64_t hash;

  

                  /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise

                   * we can use what we pre-calculated. */
@@ -510,11 +511,11 @@ 

                  else

                          hash = m->hash;

  

-                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);

+                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, NULL);

                  if (r <= 0)

                          return r;

  

-                 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);

+                 return journal_file_move_to_entry_by_offset_for_data(f, d, after_offset, direction, ret, offset);

  

          } else if (m->type == MATCH_OR_TERM) {

                  Match *i;
@@ -597,6 +598,7 @@ 

          assert(f);

  

          if (m->type == MATCH_DISCRETE) {

+                 Object *d;

                  uint64_t dp, hash;

  

                  if (JOURNAL_HEADER_KEYED_HASH(f->header))
@@ -604,27 +606,32 @@ 

                  else

                          hash = m->hash;

  

-                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);

+                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, &dp);

                  if (r <= 0)

                          return r;

  

                  /* FIXME: missing: find by monotonic */

  

                  if (j->current_location.type == LOCATION_HEAD)

-                         return journal_file_next_entry_for_data(f, dp, DIRECTION_DOWN, ret, offset);

+                         return journal_file_next_entry_for_data(f, d, DIRECTION_DOWN, ret, offset);

                  if (j->current_location.type == LOCATION_TAIL)

-                         return journal_file_next_entry_for_data(f, dp, DIRECTION_UP, ret, offset);

+                         return journal_file_next_entry_for_data(f, d, DIRECTION_UP, ret, offset);

                  if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))

-                         return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);

+                         return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, offset);

                  if (j->current_location.monotonic_set) {

-                         r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);

+                         r = journal_file_move_to_entry_by_monotonic_for_data(f, d, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);

                          if (r != -ENOENT)

                                  return r;

+ 

+                         /* The data object might have been invalidated. */

+                         r = journal_file_move_to_object(f, OBJECT_DATA, dp, &d);

+                         if (r < 0)

+                                 return r;

                  }

                  if (j->current_location.realtime_set)

-                         return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);

+                         return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, offset);

  

-                 return journal_file_next_entry_for_data(f, dp, direction, ret, offset);

+                 return journal_file_next_entry_for_data(f, d, direction, ret, offset);

  

          } else if (m->type == MATCH_OR_TERM) {

                  uint64_t np = 0;
@@ -2296,12 +2303,10 @@ 

          for (i = 0; i < n; i++) {

                  Object *d;

                  uint64_t p, l;

-                 le64_t le_hash;

                  size_t t;

                  int compression;

  

                  p = le64toh(o->entry.items[i].object_offset);

-                 le_hash = o->entry.items[i].hash;

                  r = journal_file_move_to_object(f, OBJECT_DATA, p, &d);

                  if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {

                          log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
@@ -2310,11 +2315,6 @@ 

                  if (r < 0)

                          return r;

  

-                 if (le_hash != d->data.hash) {

-                         log_debug("Entry item %"PRIu64" hash is bad, skipping over it.", i);

-                         continue;

-                 }

- 

                  l = le64toh(d->object.size) - offsetof(Object, data.payload);

  

                  compression = d->object.flags & OBJECT_COMPRESSION_MASK;
@@ -2443,10 +2443,8 @@ 

  

          for (uint64_t n = journal_file_entry_n_items(o); j->current_field < n; j->current_field++) {

                  uint64_t p;

-                 le64_t le_hash;

  

                  p = le64toh(o->entry.items[j->current_field].object_offset);

-                 le_hash = o->entry.items[j->current_field].hash;

                  r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);

                  if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {

                          log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);
@@ -2455,11 +2453,6 @@ 

                  if (r < 0)

                          return r;

  

-                 if (le_hash != o->data.hash) {

-                         log_debug("Entry item %"PRIu64" hash is bad, skipping over it.", j->current_field);

-                         continue;

-                 }

- 

                  r = return_data(j, f, o, data, size);

                  if (r == -EBADMSG) {

                          log_debug("Entry item %"PRIu64" data payload is bad, skipping over it.", j->current_field);

file modified
+84
@@ -107,6 +107,47 @@ 

          return 0;

  }

  

+ static int create_hole(int fd, off_t size) {

+         off_t offset;

+         off_t end;

+ 

+         offset = lseek(fd, 0, SEEK_CUR);

+         if (offset < 0)

+                 return -errno;

+ 

+         end = lseek(fd, 0, SEEK_END);

+         if (end < 0)

+                 return -errno;

+ 

+         /* If we're not at the end of the target file, try to punch a hole in the existing space using fallocate(). */

+ 

+         if (offset < end &&

+             fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, MIN(size, end - offset)) < 0 &&

+             !ERRNO_IS_NOT_SUPPORTED(errno))

+                 return -errno;

+ 

+         if (end - offset >= size) {

+                 /* If we've created the full hole, set the file pointer to the end of the hole we created and exit. */

+                 if (lseek(fd, offset + size, SEEK_SET) < 0)

+                         return -errno;

+ 

+                 return 0;

+         }

+ 

+         /* If we haven't created the full hole, use ftruncate() to grow the file (and the hole) to the

+          * required size and move the file pointer to the end of the file. */

+ 

+         size -= end - offset;

+ 

+         if (ftruncate(fd, end + size) < 0)

+                 return -errno;

+ 

+         if (lseek(fd, 0, SEEK_END) < 0)

+                 return -errno;

+ 

+         return 0;

+ }

+ 

  int copy_bytes_full(

                  int fdf, int fdt,

                  uint64_t max_bytes,
@@ -202,6 +243,49 @@ 

                  if (max_bytes != UINT64_MAX && m > max_bytes)

                          m = max_bytes;

  

+                 if (copy_flags & COPY_HOLES) {

+                         off_t c, e;

+ 

+                         c = lseek(fdf, 0, SEEK_CUR);

+                         if (c < 0)

+                                 return -errno;

+ 

+                         /* To see if we're in a hole, we search for the next data offset. */

+                         e = lseek(fdf, c, SEEK_DATA);

+                         if (e < 0 && errno == ENXIO)

+                                 /* If errno == ENXIO, that means we've reached the final hole of the file and

+                                 * that hole isn't followed by more data. */

+                                 e = lseek(fdf, 0, SEEK_END);

+                         if (e < 0)

+                                 return -errno;

+ 

+                         /* If we're in a hole (current offset is not a data offset), create a hole of the

+                          * same size in the target file. */

+                         if (e > c) {

+                                 r = create_hole(fdt, e - c);

+                                 if (r < 0)

+                                         return r;

+                         }

+ 

+                         c = e; /* Set c to the start of the data segment. */

+ 

+                         /* After copying a potential hole, find the end of the data segment by looking for

+                          * the next hole. If we get ENXIO, we're at EOF. */

+                         e = lseek(fdf, c, SEEK_HOLE);

+                         if (e < 0) {

+                                 if (errno == ENXIO)

+                                         break;

+                                 return -errno;

+                         }

+ 

+                         /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */

+                         if (lseek(fdf, c, SEEK_SET) < 0)

+                                 return -errno;

+ 

+                         /* Make sure we're not copying more than the current data segment. */

+                         m = MIN(m, (size_t) e - c);

+                 }

+ 

                  /* First try copy_file_range(), unless we already tried */

                  if (try_cfr) {

                          n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);

file modified
+1
@@ -24,6 +24,7 @@ 

          COPY_FSYNC_FULL  = 1 << 11, /* fsync_full() after we are done */

          COPY_SYNCFS      = 1 << 12, /* syncfs() the *top-level* dir after we are done */

          COPY_ALL_XATTRS  = 1 << 13, /* Preserve all xattrs when copying, not just those in the user namespace */

+         COPY_HOLES       = 1 << 14, /* Copy holes */

  } CopyFlags;

  

  typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);

file modified
+54
@@ -323,4 +323,58 @@ 

          assert_se(!isempty(a));

  }

  

+ TEST_RET(copy_holes) {

+         char fn[] = "/var/tmp/test-copy-hole-fd-XXXXXX";

+         char fn_copy[] = "/var/tmp/test-copy-hole-fd-XXXXXX";

+         struct stat stat;

+         off_t blksz;

+         int r, fd, fd_copy;

+ 

+         fd = mkostemp_safe(fn);

+         assert_se(fd >= 0);

+ 

+         fd_copy = mkostemp_safe(fn_copy);

+         assert_se(fd >= 0);

+ 

+         r = RET_NERRNO(fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 1));

+         if (ERRNO_IS_NOT_SUPPORTED(r))

+                 return log_tests_skipped("Filesystem doesn't support hole punching");

+         assert_se(r >= 0);

+ 

+         assert_se(fstat(fd, &stat) >= 0);

+         blksz = stat.st_blksize;

+         char buf[blksz];

+ 

+         /* We need to make sure to create hole in multiples of the block size, otherwise filesystems (btrfs)

+          * might silently truncate/extend the holes. */

+ 

+         assert_se(lseek(fd, blksz, SEEK_CUR) >= 0);

+         assert_se(write(fd, buf, blksz) >= 0);

+         assert_se(lseek(fd, 0, SEEK_END) == 2 * blksz);

+         /* Only ftruncate() can create holes at the end of a file. */

+         assert_se(ftruncate(fd, 3 * blksz) >= 0);

+         assert_se(lseek(fd, 0, SEEK_SET) >= 0);

+ 

+         assert_se(copy_bytes(fd, fd_copy, UINT64_MAX, COPY_HOLES) >= 0);

+ 

+         /* Test that the hole starts at the beginning of the file. */

+         assert_se(lseek(fd_copy, 0, SEEK_HOLE) == 0);

+         /* Test that the hole has the expected size. */

+         assert_se(lseek(fd_copy, 0, SEEK_DATA) == blksz);

+         assert_se(lseek(fd_copy, blksz, SEEK_HOLE) == 2 * blksz);

+         assert_se(lseek(fd_copy, 2 * blksz, SEEK_DATA) < 0 && errno == ENXIO);

+ 

+         /* Test that the copied file has the correct size. */

+         assert_se(fstat(fd_copy, &stat) >= 0);

+         assert_se(stat.st_size == 3 * blksz);

+ 

+         close(fd);

+         close(fd_copy);

+ 

+         unlink(fn);

+         unlink(fn_copy);

+ 

+         return 0;

+ }

+ 

  DEFINE_TEST_MAIN(LOG_DEBUG);

This PR backports all the journal improvements except the compact mode stuff (so everything that's already landed). Every commit applied cleanly, unit tests pass and the journal still works as expected.

Pull-Request has been merged by daandemeyer

3 years ago