Development discussion of WireGuard
 help / color / mirror / Atom feed
* [PATCH 0/1] wintun: use standard volatile semantics
@ 2020-04-24 19:40 Shawn Hoffman
  2020-04-24 19:40 ` [PATCH 1/1] replace atomic.h with provided APIs switch to /volatile:iso Shawn Hoffman
  2020-04-25  6:23 ` [PATCH 0/1] wintun: use standard volatile semantics Simon Rozman
  0 siblings, 2 replies; 4+ messages in thread
From: Shawn Hoffman @ 2020-04-24 19:40 UTC (permalink / raw)
  To: wireguard; +Cc: Shawn Hoffman

Make all archs are use the standardized concept of volatile.
This patch will cause the most changes to arm64 codegen, and has
yet to be tested on arm64 so is currently being submitted for
comments. If someone would like to test on arm64 it would be
appreciated. I do have an arm64 device, but it seems there's no
existing arm64/windows wireguard binary package, so I can't
just install such a thing and swap out the driver.

Shawn Hoffman (1):
  replace atomic.h with provided APIs switch to /volatile:iso

 wintun.c               | 76 +++++++++++++++++++++---------------------
 wintun.vcxproj         |  5 ++-
 wintun.vcxproj.filters |  3 --
 3 files changed, 40 insertions(+), 44 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/1] replace atomic.h with provided APIs switch to /volatile:iso
  2020-04-24 19:40 [PATCH 0/1] wintun: use standard volatile semantics Shawn Hoffman
@ 2020-04-24 19:40 ` Shawn Hoffman
  2020-04-25  6:23 ` [PATCH 0/1] wintun: use standard volatile semantics Simon Rozman
  1 sibling, 0 replies; 4+ messages in thread
From: Shawn Hoffman @ 2020-04-24 19:40 UTC (permalink / raw)
  To: wireguard; +Cc: Shawn Hoffman

---
 wintun.c               | 76 +++++++++++++++++++++---------------------
 wintun.vcxproj         |  5 ++-
 wintun.vcxproj.filters |  3 --
 3 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/wintun.c b/wintun.c
index a6a0e16..b042d35 100644
--- a/wintun.c
+++ b/wintun.c
@@ -9,7 +9,6 @@
 #include <ndis.h>
 #include <ntstrsafe.h>
 #include "undocumented.h"
-#include "atomic.h"
 
 #pragma warning(disable : 4100) /* unreferenced formal parameter */
 #pragma warning(disable : 4200) /* nonstandard: zero-sized array in struct/union */
@@ -250,7 +249,7 @@ TunSendNetBufferLists(
 
     KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
     NDIS_STATUS Status;
-    if ((Status = NDIS_STATUS_PAUSED, !InterlockedGet(&Ctx->Running)) ||
+    if ((Status = NDIS_STATUS_PAUSED, !ReadAcquire(&Ctx->Running)) ||
         (Status = NDIS_STATUS_MEDIA_DISCONNECTED, KeReadStateEvent(&Ctx->Device.Disconnected)))
         goto skipNbl;
 
@@ -258,7 +257,7 @@ TunSendNetBufferLists(
     ULONG RingCapacity = Ctx->Device.Send.Capacity;
 
     /* Allocate space for packets in the ring. */
-    ULONG RingHead = InterlockedGetU(&Ring->Head);
+    ULONG RingHead = ReadULongAcquire(&Ring->Head);
     if (Status = NDIS_STATUS_ADAPTER_NOT_READY, RingHead >= RingCapacity)
         goto skipNbl;
 
@@ -325,7 +324,7 @@ TunSendNetBufferLists(
     {
         NET_BUFFER_LIST *CompletedNbl = Ctx->Device.Send.ActiveNbls.Head;
         Ctx->Device.Send.ActiveNbls.Head = NET_BUFFER_LIST_NEXT_NBL_EX(CompletedNbl);
-        InterlockedSetU(&Ring->Tail, TunNblGetOffset(CompletedNbl));
+        WriteULongRelease(&Ring->Tail, TunNblGetOffset(CompletedNbl));
         KeSetEvent(Ctx->Device.Send.TailMoved, IO_NETWORK_INCREMENT, FALSE);
         NdisMSendNetBufferListsComplete(
             Ctx->MiniportAdapterHandle, CompletedNbl, NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL);
@@ -343,11 +342,11 @@ skipNbl:
     ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
     NdisMSendNetBufferListsComplete(Ctx->MiniportAdapterHandle, NetBufferLists, 0);
 updateStatistics:
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutOctets, SentPacketsSize);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastOctets, SentPacketsSize);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts, SentPacketsCount);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutErrors, ErrorPacketsCount);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifOutDiscards, DiscardedPacketsCount);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutOctets, SentPacketsSize);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutUcastOctets, SentPacketsSize);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts, SentPacketsCount);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifOutErrors, ErrorPacketsCount);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifOutDiscards, DiscardedPacketsCount);
 }
 
 static MINIPORT_CANCEL_SEND TunCancelSend;
@@ -393,15 +392,15 @@ TunReturnNetBufferLists(NDIS_HANDLE MiniportAdapterContext, PNET_BUFFER_LIST Net
             if (!Ctx->Device.Receive.ActiveNbls.Head)
                 KeSetEvent(&Ctx->Device.Receive.ActiveNbls.Empty, IO_NO_INCREMENT, FALSE);
             KeReleaseInStackQueuedSpinLock(&LockHandle);
-            InterlockedSetU(&Ring->Head, TunNblGetOffset(CompletedNbl));
+            WriteULongRelease(&Ring->Head, TunNblGetOffset(CompletedNbl));
             NdisFreeNetBufferList(CompletedNbl);
         }
     }
 
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCInOctets, ReceivedPacketsSize);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCInUcastOctets, ReceivedPacketsSize);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifHCInUcastPkts, ReceivedPacketsCount);
-    InterlockedAdd64((LONG64 *)&Ctx->Statistics.ifInErrors, ErrorPacketsCount);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCInOctets, ReceivedPacketsSize);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCInUcastOctets, ReceivedPacketsSize);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifHCInUcastPkts, ReceivedPacketsCount);
+    InterlockedAddNoFence64((LONG64 *)&Ctx->Statistics.ifInErrors, ErrorPacketsCount);
 }
 
 _IRQL_requires_max_(PASSIVE_LEVEL)
@@ -419,20 +418,20 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
     VOID *Events[] = { &Ctx->Device.Disconnected, Ctx->Device.Receive.TailMoved };
     ASSERT(RTL_NUMBER_OF(Events) <= THREAD_WAIT_OBJECTS);
 
-    ULONG RingHead = InterlockedGetU(&Ring->Head);
+    ULONG RingHead = ReadULongAcquire(&Ring->Head);
     if (RingHead >= RingCapacity)
         goto cleanup;
 
     while (!KeReadStateEvent(&Ctx->Device.Disconnected))
     {
         /* Get next packet from the ring. */
-        ULONG RingTail = InterlockedGetU(&Ring->Tail);
+        ULONG RingTail = ReadULongAcquire(&Ring->Tail);
         if (RingHead == RingTail)
         {
             LARGE_INTEGER SpinStart = KeQueryPerformanceCounter(NULL);
             for (;;)
             {
-                RingTail = InterlockedGetU(&Ring->Tail);
+                RingTail = ReadULongAcquire(&Ring->Tail);
                 if (RingTail != RingHead)
                     break;
                 if (KeReadStateEvent(&Ctx->Device.Disconnected))
@@ -444,16 +443,16 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
             }
             if (RingHead == RingTail)
             {
-                InterlockedSet(&Ring->Alertable, TRUE);
-                RingTail = InterlockedGetU(&Ring->Tail);
+                WriteRelease(&Ring->Alertable, TRUE);
+                RingTail = ReadULongAcquire(&Ring->Tail);
                 if (RingHead == RingTail)
                 {
                     KeWaitForMultipleObjects(
                         RTL_NUMBER_OF(Events), Events, WaitAny, Executive, KernelMode, FALSE, NULL, NULL);
-                    InterlockedSet(&Ring->Alertable, FALSE);
+                    WriteRelease(&Ring->Alertable, FALSE);
                     continue;
                 }
-                InterlockedSet(&Ring->Alertable, FALSE);
+                WriteRelease(&Ring->Alertable, FALSE);
                 KeClearEvent(Ctx->Device.Receive.TailMoved);
             }
         }
@@ -501,7 +500,7 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
         TunNblSetOffsetAndMarkActive(Nbl, RingHead);
 
         KIRQL Irql = ExAcquireSpinLockShared(&Ctx->TransitionLock);
-        if (!InterlockedGet(&Ctx->Running))
+        if (!ReadAcquire(&Ctx->Running))
             goto cleanupNbl;
 
         KLOCK_QUEUE_HANDLE LockHandle;
@@ -530,16 +529,16 @@ TunProcessReceiveData(_Inout_ TUN_CTX *Ctx)
         ExReleaseSpinLockShared(&Ctx->TransitionLock, Irql);
         NdisFreeNetBufferList(Nbl);
     skipNbl:
-        InterlockedIncrement64((LONG64 *)&Ctx->Statistics.ifInDiscards);
+        InterlockedIncrementNoFence64((LONG64 *)&Ctx->Statistics.ifInDiscards);
         KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
-        InterlockedSetU(&Ring->Head, RingHead);
+        WriteULongRelease(&Ring->Head, RingHead);
     }
 
     /* Wait for all NBLs to return: 1. To prevent race between proceeding and invalidating ring head. 2. To have
      * TunDispatchUnregisterBuffers() implicitly wait before releasing ring MDL used by NBL(s). */
     KeWaitForSingleObject(&Ctx->Device.Receive.ActiveNbls.Empty, Executive, KernelMode, FALSE, NULL);
 cleanup:
-    InterlockedSetU(&Ring->Head, MAXULONG);
+    WriteULongRelease(&Ring->Head, MAXULONG);
 }
 
 #define IS_POW2(x) ((x) && !((x) & ((x)-1)))
@@ -595,7 +594,7 @@ TunRegisterBuffers(_Inout_ TUN_CTX *Ctx, _Inout_ IRP *Irp)
     if (Status = STATUS_INSUFFICIENT_RESOURCES, !Ctx->Device.Send.Ring)
         goto cleanupSendUnlockPages;
 
-    Ctx->Device.Send.RingTail = InterlockedGetU(&Ctx->Device.Send.Ring->Tail);
+    Ctx->Device.Send.RingTail = ReadULongAcquire(&Ctx->Device.Send.Ring->Tail);
     if (Status = STATUS_INVALID_PARAMETER, Ctx->Device.Send.RingTail >= Ctx->Device.Send.Capacity)
         goto cleanupSendUnlockPages;
 
@@ -709,7 +708,7 @@ TunUnregisterBuffers(_Inout_ TUN_CTX *Ctx, _In_ FILE_OBJECT *Owner)
     }
     ZwClose(Ctx->Device.Receive.Thread);
 
-    InterlockedSetU(&Ctx->Device.Send.Ring->Tail, MAXULONG);
+    WriteULongRelease(&Ctx->Device.Send.Ring->Tail, MAXULONG);
     KeSetEvent(Ctx->Device.Send.TailMoved, IO_NO_INCREMENT, FALSE);
 
     MmUnlockPages(Ctx->Device.Receive.Mdl);
@@ -926,7 +925,7 @@ static NDIS_STATUS
 TunRestart(NDIS_HANDLE MiniportAdapterContext, PNDIS_MINIPORT_RESTART_PARAMETERS MiniportRestartParameters)
 {
     TUN_CTX *Ctx = (TUN_CTX *)MiniportAdapterContext;
-    InterlockedSet(&Ctx->Running, TRUE);
+    WriteRelease(&Ctx->Running, TRUE);
     return NDIS_STATUS_SUCCESS;
 }
 
@@ -937,7 +936,7 @@ TunPause(NDIS_HANDLE MiniportAdapterContext, PNDIS_MINIPORT_PAUSE_PARAMETERS Min
 {
     TUN_CTX *Ctx = (TUN_CTX *)MiniportAdapterContext;
 
-    InterlockedSet(&Ctx->Running, FALSE);
+    WriteRelease(&Ctx->Running, FALSE);
     ExReleaseSpinLockExclusive(
         &Ctx->TransitionLock,
         ExAcquireSpinLockExclusive(&Ctx->TransitionLock)); /* Ensure above change is visible to all readers. */
@@ -1131,9 +1130,10 @@ TunHaltEx(NDIS_HANDLE MiniportAdapterContext, NDIS_HALT_ACTION HaltAction)
         ExAcquireSpinLockExclusive(&Ctx->TransitionLock)); /* Ensure above change is visible to all readers. */
     NdisFreeNetBufferListPool(Ctx->NblPool);
 
-    InterlockedSetPointer(&Ctx->MiniportAdapterHandle, NULL);
-#pragma warning(suppress : 28175)
-    InterlockedSetPointer(&Ctx->FunctionalDeviceObject->Reserved, NULL);
+#pragma warning(suppress : 6387)
+    WritePointerNoFence(&Ctx->MiniportAdapterHandle, NULL);
+#pragma warning(suppress : 6387 28175)
+    WritePointerNoFence(&Ctx->FunctionalDeviceObject->Reserved, NULL);
     KeEnterCriticalRegion();
     ExAcquireResourceExclusiveLite(&TunDispatchCtxGuard, TRUE); /* Ensure above change is visible to all readers. */
     ExReleaseResourceLite(&TunDispatchCtxGuard);
@@ -1242,16 +1242,16 @@ TunOidQuery(_Inout_ TUN_CTX *Ctx, _Inout_ NDIS_OID_REQUEST *OidRequest)
     case OID_GEN_XMIT_OK:
         return TunOidQueryWrite32or64(
             OidRequest,
-            InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts) +
-                InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCOutMulticastPkts) +
-                InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCOutBroadcastPkts));
+            ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutUcastPkts) +
+                ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutMulticastPkts) +
+                ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCOutBroadcastPkts));
 
     case OID_GEN_RCV_OK:
         return TunOidQueryWrite32or64(
             OidRequest,
-            InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCInUcastPkts) +
-                InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCInMulticastPkts) +
-                InterlockedGet64((LONG64 *)&Ctx->Statistics.ifHCInBroadcastPkts));
+            ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCInUcastPkts) +
+                ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCInMulticastPkts) +
+                ReadNoFence64((LONG64 *)&Ctx->Statistics.ifHCInBroadcastPkts));
 
     case OID_GEN_STATISTICS:
         return TunOidQueryWriteBuf(OidRequest, &Ctx->Statistics, (ULONG)sizeof(Ctx->Statistics));
diff --git a/wintun.vcxproj b/wintun.vcxproj
index 3bb61d1..abe4a17 100644
--- a/wintun.vcxproj
+++ b/wintun.vcxproj
@@ -125,7 +125,7 @@
   <ItemDefinitionGroup>
     <ClCompile>
       <PreprocessorDefinitions>NDIS_MINIPORT_DRIVER=1;NDIS620_MINIPORT=1;NDIS683_MINIPORT=1;NDIS_WDM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalOptions>/volatile:ms %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions>/volatile:iso %(AdditionalOptions)</AdditionalOptions>
       <EnablePREfast>true</EnablePREfast>
     </ClCompile>
     <ResourceCompile>
@@ -171,9 +171,8 @@
     <FilesToPackage Include="$(TargetPath)" Condition="'$(ConfigurationType)'=='Driver' or '$(ConfigurationType)'=='DynamicLibrary'" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="atomic.h" />
     <ClInclude Include="undocumented.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets" />
-</Project>
+</Project>
\ No newline at end of file
diff --git a/wintun.vcxproj.filters b/wintun.vcxproj.filters
index 467cc48..3e19120 100644
--- a/wintun.vcxproj.filters
+++ b/wintun.vcxproj.filters
@@ -33,8 +33,5 @@
     <ClInclude Include="undocumented.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="atomic.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
-- 
2.20.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/1] wintun: use standard volatile semantics
  2020-04-24 19:40 [PATCH 0/1] wintun: use standard volatile semantics Shawn Hoffman
  2020-04-24 19:40 ` [PATCH 1/1] replace atomic.h with provided APIs switch to /volatile:iso Shawn Hoffman
@ 2020-04-25  6:23 ` Simon Rozman
  2020-04-25  8:51   ` Jason A. Donenfeld
  1 sibling, 1 reply; 4+ messages in thread
From: Simon Rozman @ 2020-04-25  6:23 UTC (permalink / raw)
  To: Shawn Hoffman, wireguard

[-- Attachment #1: Type: text/plain, Size: 2366 bytes --]

Hi Shawn!

Thank you. This generally looks good. I can't find any official documentation on the set of functions and macros from wdm.h, but that's nothing new with Microsoft. 

I have some nitpicks thou.

1. As the atomic.h is no longer used; it could be deleted from the repo.
2. By removing #include "atomic.h", you should add #include <wdm.h>. wintun.c is now directly using functions and macros declared in wdm.h.
3. Please add "Signed-of-by: Shawn Hoffman <godisgovernment@gmail.com>" line to your commit message.
4. You are right, there is no WireGuard for Windows 10 ARM64 yet, because there is no MinGW and Go support for Windows 10 ARM64 yet. If we can get arm-w64-mingw32-native, we could use set GOARCH=arm to compile 32-bit ARM wireguard.exe. 32-bit ARM runs at native speed on Windows 10 ARM64. The challenge I see is making SetupAPI work. I do know the SetupAPI does not work in x86 processes on AMD64 Windows. The System32 folder deflection makes it fail when attempting to create a device. I am expecting the same in ARM process on ARM64 Windows.

I have the ARM/ARM64 support high on my TODO list. I do hope to find some time next week to play with your patch and WireGuard ARM.

Regards, Simon

-----Original Message-----
From: WireGuard <wireguard-bounces@lists.zx2c4.com> on behalf of Shawn Hoffman <godisgovernment@gmail.com>
Date: Friday, 24. April 2020 at 22:52
To: "wireguard@lists.zx2c4.com" <wireguard@lists.zx2c4.com>
Cc: Shawn Hoffman <godisgovernment@gmail.com>
Subject: [PATCH 0/1] wintun: use standard volatile semantics

    Make all archs are use the standardized concept of volatile.
    This patch will cause the most changes to arm64 codegen, and has
    yet to be tested on arm64 so is currently being submitted for
    comments. If someone would like to test on arm64 it would be
    appreciated. I do have an arm64 device, but it seems there's no
    existing arm64/windows wireguard binary package, so I can't
    just install such a thing and swap out the driver.

    Shawn Hoffman (1):
      replace atomic.h with provided APIs switch to /volatile:iso

     wintun.c               | 76 +++++++++++++++++++++---------------------
     wintun.vcxproj         |  5 ++-
     wintun.vcxproj.filters |  3 --
     3 files changed, 40 insertions(+), 44 deletions(-)

    -- 
    2.20.1


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 2965 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/1] wintun: use standard volatile semantics
  2020-04-25  6:23 ` [PATCH 0/1] wintun: use standard volatile semantics Simon Rozman
@ 2020-04-25  8:51   ` Jason A. Donenfeld
  0 siblings, 0 replies; 4+ messages in thread
From: Jason A. Donenfeld @ 2020-04-25  8:51 UTC (permalink / raw)
  To: Simon Rozman; +Cc: Shawn Hoffman, wireguard

On Sat, Apr 25, 2020 at 12:23 AM Simon Rozman <simon@rozman.si> wrote:
>
> Hi Shawn!
>
> Thank you. This generally looks good. I can't find any official documentation on the set of functions and macros from wdm.h, but that's nothing new with Microsoft.
>
> I have some nitpicks thou.
>
> 1. As the atomic.h is no longer used; it could be deleted from the repo.
> 2. By removing #include "atomic.h", you should add #include <wdm.h>. wintun.c is now directly using functions and macros declared in wdm.h.
> 3. Please add "Signed-of-by: Shawn Hoffman <godisgovernment@gmail.com>" line to your commit message.

He resubmitted with his S-o-b line. See that patch series for the latest.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-04-25  8:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-24 19:40 [PATCH 0/1] wintun: use standard volatile semantics Shawn Hoffman
2020-04-24 19:40 ` [PATCH 1/1] replace atomic.h with provided APIs switch to /volatile:iso Shawn Hoffman
2020-04-25  6:23 ` [PATCH 0/1] wintun: use standard volatile semantics Simon Rozman
2020-04-25  8:51   ` Jason A. Donenfeld

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).