The UEFI specification suggests calling GetMemoryMap immediately before ExitBootServices to ensure that the loader has the current memory map. The firmware is able to enforce this since we have to pass a "MapKey" from the last GetMemoryMap to ExitBootServices. The T14 AMD gen 1 firmware is quite strict about this, and even a call to OutputString will invalidate the MapKey. This causes ExitBootServices to fail, and we enter 9front with boot services still running. This causes all sorts of problems including leaving the IOMMU enabled, breaking 9front's PCI drivers. To fix this, move memconf() to unload(), right before ExitBootServices. To retain the ability to override the memory map, check if *e820 is already set and if so, ignore the output of GetMemoryMap except for the MapKey. --- Still some room for improvement around the *conf helper functions, but I think this is fine for now. I also wonder what we should be doing if GetMemoryMap fails. If we don't have any potentially valid MapKey, how could ExitBootServices succeed? Perhaps we could make unload return an error string on failure, and pass it back from bootkern so that we can inform the user that something is wrong. diff a854bb07cd792a52c5e75aabca76c22b7dd18fc6 eb07ce3baa9705a30ecf3cd6b31ff851e24cb3d1 --- a/sys/src/boot/efi/efi.c +++ b/sys/src/boot/efi/efi.c @@ -36,12 +36,6 @@ eficall(ST->BootServices->Stall, (UINTN)us); } -void -unload(void) -{ - eficall(ST->BootServices->ExitBootServices, IH, MK); -} - static void memconf(char **cfg) { @@ -72,6 +66,8 @@ entvers = 1; if(eficall(ST->BootServices->GetMemoryMap, &mapsize, mapbuf, &MK, &entsize, &entvers)) return; + if(cfg == nil) + return; s = *cfg; for(p = mapbuf; mapsize >= entsize; p += entsize, mapsize -= entsize){ @@ -93,7 +89,6 @@ *s = '\0'; if(s > *cfg){ s[-1] = '\n'; - print(*cfg); *cfg = s; } } @@ -276,9 +271,15 @@ void eficonfig(char **cfg) { - memconf(cfg); acpiconf(cfg); screenconf(cfg); +} + +void +unload(char **cfg) +{ + memconf(cfg); + eficall(ST->BootServices->ExitBootServices, IH, MK); } EFI_STATUS --- a/sys/src/boot/efi/fns.h +++ b/sys/src/boot/efi/fns.h @@ -16,7 +16,7 @@ void (*close)(void *f); int readn(void *f, void *data, int len); -void unload(void); +void unload(char **cfg); int getc(void); void putc(int c); --- a/sys/src/boot/efi/sub.c +++ b/sys/src/boot/efi/sub.c @@ -156,6 +156,23 @@ char *confend; +static int +hasconf(char *s) +{ + char *p; + int n; + + n = strlen(s); + for(p = BOOTARGS; n <= confend - p; p++){ + if(memcmp(p, s, n) == 0) + return 1; + p = strchr(p, '\n'); + if(p == nil) + break; + } + return 0; +} + static char* getconf(char *s, char *buf) { @@ -364,7 +381,7 @@ close(f); print("boot\n"); - unload(); + unload(hasconf("*e820=") ? nil : &confend); jump(e);
Oh, this seems to be the exact same problem I have with my devices
here. Was having empty nvme queues after identify (and reading the
spec everything else was mostly fine in our driver besides some quirks
to workaround shitty controllers) so i banged around to see if
firmware was messing it up before boot and realized the nvme feature
was pointing to that, couldn't figure out it was actually something
that would need to change in the bootloader though! (i banged
pcireset/nssr/initseg() a lot)
Thanks so much for the find, will test this asap.
On Wed, 16 Nov 2022 at 17:13, Michael Forney <mforney@mforney.org> wrote:
>
>
> The UEFI specification suggests calling GetMemoryMap immediately
> before ExitBootServices to ensure that the loader has the current
> memory map. The firmware is able to enforce this since we have to
> pass a "MapKey" from the last GetMemoryMap to ExitBootServices.
>
> The T14 AMD gen 1 firmware is quite strict about this, and even a call
> to OutputString will invalidate the MapKey. This causes
> ExitBootServices to fail, and we enter 9front with boot services still
> running. This causes all sorts of problems including leaving the
> IOMMU enabled, breaking 9front's PCI drivers.
>
> To fix this, move memconf() to unload(), right before
> ExitBootServices. To retain the ability to override the memory map,
> check if *e820 is already set and if so, ignore the output of
> GetMemoryMap except for the MapKey.
> ---
> Still some room for improvement around the *conf helper functions, but
> I think this is fine for now.
>
> I also wonder what we should be doing if GetMemoryMap fails. If we
> don't have any potentially valid MapKey, how could ExitBootServices
> succeed? Perhaps we could make unload return an error string on
> failure, and pass it back from bootkern so that we can inform the user
> that something is wrong.
>
> diff a854bb07cd792a52c5e75aabca76c22b7dd18fc6 eb07ce3baa9705a30ecf3cd6b31ff851e24cb3d1
> --- a/sys/src/boot/efi/efi.c
> +++ b/sys/src/boot/efi/efi.c
> @@ -36,12 +36,6 @@
> eficall(ST->BootServices->Stall, (UINTN)us);
> }
>
> -void
> -unload(void)
> -{
> - eficall(ST->BootServices->ExitBootServices, IH, MK);
> -}
> -
> static void
> memconf(char **cfg)
> {
> @@ -72,6 +66,8 @@
> entvers = 1;
> if(eficall(ST->BootServices->GetMemoryMap, &mapsize, mapbuf, &MK, &entsize, &entvers))
> return;
> + if(cfg == nil)
> + return;
>
> s = *cfg;
> for(p = mapbuf; mapsize >= entsize; p += entsize, mapsize -= entsize){
> @@ -93,7 +89,6 @@
> *s = '\0';
> if(s > *cfg){
> s[-1] = '\n';
> - print(*cfg);
> *cfg = s;
> }
> }
> @@ -276,9 +271,15 @@
> void
> eficonfig(char **cfg)
> {
> - memconf(cfg);
> acpiconf(cfg);
> screenconf(cfg);
> +}
> +
> +void
> +unload(char **cfg)
> +{
> + memconf(cfg);
> + eficall(ST->BootServices->ExitBootServices, IH, MK);
> }
>
> EFI_STATUS
> --- a/sys/src/boot/efi/fns.h
> +++ b/sys/src/boot/efi/fns.h
> @@ -16,7 +16,7 @@
> void (*close)(void *f);
>
> int readn(void *f, void *data, int len);
> -void unload(void);
> +void unload(char **cfg);
>
> int getc(void);
> void putc(int c);
> --- a/sys/src/boot/efi/sub.c
> +++ b/sys/src/boot/efi/sub.c
> @@ -156,6 +156,23 @@
>
> char *confend;
>
> +static int
> +hasconf(char *s)
> +{
> + char *p;
> + int n;
> +
> + n = strlen(s);
> + for(p = BOOTARGS; n <= confend - p; p++){
> + if(memcmp(p, s, n) == 0)
> + return 1;
> + p = strchr(p, '\n');
> + if(p == nil)
> + break;
> + }
> + return 0;
> +}
> +
> static char*
> getconf(char *s, char *buf)
> {
> @@ -364,7 +381,7 @@
>
> close(f);
> print("boot\n");
> - unload();
> + unload(hasconf("*e820=") ? nil : &confend);
>
> jump(e);
>
Yup, works nicely here, tysm.
On Sat, 19 Nov 2022 at 13:14, Lucas Francesco
<lucas.francesco93@gmail.com> wrote:
>
> Oh, this seems to be the exact same problem I have with my devices
> here. Was having empty nvme queues after identify (and reading the
> spec everything else was mostly fine in our driver besides some quirks
> to workaround shitty controllers) so i banged around to see if
> firmware was messing it up before boot and realized the nvme feature
> was pointing to that, couldn't figure out it was actually something
> that would need to change in the bootloader though! (i banged
> pcireset/nssr/initseg() a lot)
>
> Thanks so much for the find, will test this asap.
>
> On Wed, 16 Nov 2022 at 17:13, Michael Forney <mforney@mforney.org> wrote:
> >
> >
> > The UEFI specification suggests calling GetMemoryMap immediately
> > before ExitBootServices to ensure that the loader has the current
> > memory map. The firmware is able to enforce this since we have to
> > pass a "MapKey" from the last GetMemoryMap to ExitBootServices.
> >
> > The T14 AMD gen 1 firmware is quite strict about this, and even a call
> > to OutputString will invalidate the MapKey. This causes
> > ExitBootServices to fail, and we enter 9front with boot services still
> > running. This causes all sorts of problems including leaving the
> > IOMMU enabled, breaking 9front's PCI drivers.
> >
> > To fix this, move memconf() to unload(), right before
> > ExitBootServices. To retain the ability to override the memory map,
> > check if *e820 is already set and if so, ignore the output of
> > GetMemoryMap except for the MapKey.
> > ---
> > Still some room for improvement around the *conf helper functions, but
> > I think this is fine for now.
> >
> > I also wonder what we should be doing if GetMemoryMap fails. If we
> > don't have any potentially valid MapKey, how could ExitBootServices
> > succeed? Perhaps we could make unload return an error string on
> > failure, and pass it back from bootkern so that we can inform the user
> > that something is wrong.
> >
> > diff a854bb07cd792a52c5e75aabca76c22b7dd18fc6 eb07ce3baa9705a30ecf3cd6b31ff851e24cb3d1
> > --- a/sys/src/boot/efi/efi.c
> > +++ b/sys/src/boot/efi/efi.c
> > @@ -36,12 +36,6 @@
> > eficall(ST->BootServices->Stall, (UINTN)us);
> > }
> >
> > -void
> > -unload(void)
> > -{
> > - eficall(ST->BootServices->ExitBootServices, IH, MK);
> > -}
> > -
> > static void
> > memconf(char **cfg)
> > {
> > @@ -72,6 +66,8 @@
> > entvers = 1;
> > if(eficall(ST->BootServices->GetMemoryMap, &mapsize, mapbuf, &MK, &entsize, &entvers))
> > return;
> > + if(cfg == nil)
> > + return;
> >
> > s = *cfg;
> > for(p = mapbuf; mapsize >= entsize; p += entsize, mapsize -= entsize){
> > @@ -93,7 +89,6 @@
> > *s = '\0';
> > if(s > *cfg){
> > s[-1] = '\n';
> > - print(*cfg);
> > *cfg = s;
> > }
> > }
> > @@ -276,9 +271,15 @@
> > void
> > eficonfig(char **cfg)
> > {
> > - memconf(cfg);
> > acpiconf(cfg);
> > screenconf(cfg);
> > +}
> > +
> > +void
> > +unload(char **cfg)
> > +{
> > + memconf(cfg);
> > + eficall(ST->BootServices->ExitBootServices, IH, MK);
> > }
> >
> > EFI_STATUS
> > --- a/sys/src/boot/efi/fns.h
> > +++ b/sys/src/boot/efi/fns.h
> > @@ -16,7 +16,7 @@
> > void (*close)(void *f);
> >
> > int readn(void *f, void *data, int len);
> > -void unload(void);
> > +void unload(char **cfg);
> >
> > int getc(void);
> > void putc(int c);
> > --- a/sys/src/boot/efi/sub.c
> > +++ b/sys/src/boot/efi/sub.c
> > @@ -156,6 +156,23 @@
> >
> > char *confend;
> >
> > +static int
> > +hasconf(char *s)
> > +{
> > + char *p;
> > + int n;
> > +
> > + n = strlen(s);
> > + for(p = BOOTARGS; n <= confend - p; p++){
> > + if(memcmp(p, s, n) == 0)
> > + return 1;
> > + p = strchr(p, '\n');
> > + if(p == nil)
> > + break;
> > + }
> > + return 0;
> > +}
> > +
> > static char*
> > getconf(char *s, char *buf)
> > {
> > @@ -364,7 +381,7 @@
> >
> > close(f);
> > print("boot\n");
> > - unload();
> > + unload(hasconf("*e820=") ? nil : &confend);
> >
> > jump(e);
> >
Thank You! i finally got around testing and it worked fine. I didnt apply the original patch as i do not like hiding that logic away in a cfg parameter to unload. The change refactors the config code adding a findconf() function and keeps the logic about optinonally passing &confend to memconf() instead of moving it to unload(). -- cinap