aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Sema/SemaCUDA.cpp')
-rw-r--r--lib/Sema/SemaCUDA.cpp93
1 files changed, 40 insertions, 53 deletions
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 203c09c57112..d0ddfd040c9c 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -267,6 +267,18 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
CXXMethodDecl *MemberDecl,
bool ConstRHS,
bool Diagnose) {
+ // If the defaulted special member is defined lexically outside of its
+ // owning class, or the special member already has explicit device or host
+ // attributes, do not infer.
+ bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
+ bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
+ bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
+ bool HasExplicitAttr =
+ (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
+ (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
+ if (!InClass || HasExplicitAttr)
+ return false;
+
llvm::Optional<CUDAFunctionTarget> InferredTarget;
// We're going to invoke special member lookup; mark that these special
@@ -371,21 +383,23 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
}
}
+
+ // If no target was inferred, mark this member as __host__ __device__;
+ // it's the least restrictive option that can be invoked from any target.
+ bool NeedsH = true, NeedsD = true;
if (InferredTarget.hasValue()) {
- if (InferredTarget.getValue() == CFT_Device) {
- MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
- } else if (InferredTarget.getValue() == CFT_Host) {
- MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
- } else {
- MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
- MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
- }
- } else {
- // If no target was inferred, mark this member as __host__ __device__;
- // it's the least restrictive option that can be invoked from any target.
+ if (InferredTarget.getValue() == CFT_Device)
+ NeedsH = false;
+ else if (InferredTarget.getValue() == CFT_Host)
+ NeedsD = false;
+ }
+
+ // We either setting attributes first time, or the inferred ones must match
+ // previously set ones.
+ if (NeedsD && !HasD)
MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ if (NeedsH && !HasH)
MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
- }
return false;
}
@@ -586,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}
-// Do we know that we will eventually codegen the given function?
-static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
- // Templates are emitted when they're instantiated.
- if (FD->isDependentContext())
- return false;
-
- // When compiling for device, host functions are never emitted. Similarly,
- // when compiling for host, device and global functions are never emitted.
- // (Technically, we do emit a host-side stub for global functions, but this
- // doesn't count for our purposes here.)
- Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
- if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
- return false;
- if (!S.getLangOpts().CUDAIsDevice &&
- (T == Sema::CFT_Device || T == Sema::CFT_Global))
- return false;
-
- // Check whether this function is externally visible -- if so, it's
- // known-emitted.
- //
- // We have to check the GVA linkage of the function's *definition* -- if we
- // only have a declaration, we don't know whether or not the function will be
- // emitted, because (say) the definition could include "inline".
- FunctionDecl *Def = FD->getDefinition();
-
- if (Def &&
- !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
- return true;
-
- // Otherwise, the function is known-emitted if it's in our set of
- // known-emitted functions.
- return S.DeviceKnownEmittedFns.count(FD) > 0;
-}
-
Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
@@ -633,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
// device code if we're compiling for device. Defer any errors in device
// mode until the function is known-emitted.
if (getLangOpts().CUDAIsDevice) {
- return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ FunctionEmissionStatus::Emitted)
? DeviceDiagBuilder::K_ImmediateWithCallStack
: DeviceDiagBuilder::K_Deferred;
}
@@ -661,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
if (getLangOpts().CUDAIsDevice)
return DeviceDiagBuilder::K_Nop;
- return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ FunctionEmissionStatus::Emitted)
? DeviceDiagBuilder::K_ImmediateWithCallStack
: DeviceDiagBuilder::K_Deferred;
default:
@@ -688,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// If the caller is known-emitted, mark the callee as known-emitted.
// Otherwise, mark the call in our call graph so we can traverse it later.
- bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+ bool CallerKnownEmitted =
+ getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
if (CallerKnownEmitted) {
// Host-side references to a __global__ function refer to the stub, so the
// function itself is never emitted and therefore should not be marked.
- if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
- markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted);
+ if (!shouldIgnoreInHostDeviceCheck(Callee))
+ markKnownEmitted(
+ *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
+ return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
+ });
} else {
// If we have
// host fn calls kernel fn calls host+device,
@@ -701,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// omitting at the call to the kernel from the callgraph. This ensures
// that, when compiling for host, only HD functions actually called from the
// host get marked as known-emitted.
- if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+ if (!shouldIgnoreInHostDeviceCheck(Callee))
DeviceCallGraph[Caller].insert({Callee, Loc});
}
@@ -806,7 +792,8 @@ void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
std::string Sema::getCudaConfigureFuncName() const {
if (getLangOpts().HIP)
- return "hipConfigureCall";
+ return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
+ : "hipConfigureCall";
// New CUDA kernel launch sequence.
if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),