aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 19:18:08 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 19:18:08 +0000
commitbab175ec4b075c8076ba14c762900392533f6ee4 (patch)
tree01f4f29419a2cb10abe13c1e63cd2a66068b0137 /lib/Sema/SemaCUDA.cpp
parent8b7a8012d223fac5d17d16a66bb39168a9a1dfc0 (diff)
downloadsrc-bab175ec4b075c8076ba14c762900392533f6ee4.tar.gz
src-bab175ec4b075c8076ba14c762900392533f6ee4.zip
Vendor import of clang trunk r290819:vendor/clang/clang-trunk-r290819
Notes
Notes: svn path=/vendor/clang/dist/; revision=311118 svn path=/vendor/clang/clang-trunk-r290819/; revision=311119; tag=vendor/clang/clang-trunk-r290819
Diffstat (limited to 'lib/Sema/SemaCUDA.cpp')
-rw-r--r--lib/Sema/SemaCUDA.cpp520
1 files changed, 475 insertions, 45 deletions
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 90af6d5a927f..6f272ec839f5 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -18,11 +18,25 @@
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/Template.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
using namespace clang;
+void Sema::PushForceCUDAHostDevice() {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ ForceCUDAHostDeviceDepth++;
+}
+
+bool Sema::PopForceCUDAHostDevice() {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ if (ForceCUDAHostDeviceDepth == 0)
+ return false;
+ ForceCUDAHostDeviceDepth--;
+ return true;
+}
+
ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
MultiExprArg ExecConfig,
SourceLocation GGGLoc) {
@@ -40,21 +54,73 @@ ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
/*IsExecConfig=*/true);
}
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
+ bool HasHostAttr = false;
+ bool HasDeviceAttr = false;
+ bool HasGlobalAttr = false;
+ bool HasInvalidTargetAttr = false;
+ while (Attr) {
+ switch(Attr->getKind()){
+ case AttributeList::AT_CUDAGlobal:
+ HasGlobalAttr = true;
+ break;
+ case AttributeList::AT_CUDAHost:
+ HasHostAttr = true;
+ break;
+ case AttributeList::AT_CUDADevice:
+ HasDeviceAttr = true;
+ break;
+ case AttributeList::AT_CUDAInvalidTarget:
+ HasInvalidTargetAttr = true;
+ break;
+ default:
+ break;
+ }
+ Attr = Attr->getNext();
+ }
+ if (HasInvalidTargetAttr)
+ return CFT_InvalidTarget;
+
+ if (HasGlobalAttr)
+ return CFT_Global;
+
+ if (HasHostAttr && HasDeviceAttr)
+ return CFT_HostDevice;
+
+ if (HasDeviceAttr)
+ return CFT_Device;
+
+ return CFT_Host;
+}
+
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+ return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+ return isa<A>(Attribute) &&
+ !(IgnoreImplicitAttr && Attribute->isImplicit());
+ });
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+ bool IgnoreImplicitHDAttr) {
+ // Code that lives outside a function is run on the host.
+ if (D == nullptr)
+ return CFT_Host;
+
if (D->hasAttr<CUDAInvalidTargetAttr>())
return CFT_InvalidTarget;
if (D->hasAttr<CUDAGlobalAttr>())
return CFT_Global;
- if (D->hasAttr<CUDADeviceAttr>()) {
- if (D->hasAttr<CUDAHostAttr>())
+ if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+ if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
return CFT_HostDevice;
return CFT_Device;
- } else if (D->hasAttr<CUDAHostAttr>()) {
+ } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
return CFT_Host;
- } else if (D->isImplicit()) {
+ } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
// Some implicit declarations (like intrinsic functions) are not marked.
// Set the most lenient target on them for maximal flexibility.
return CFT_HostDevice;
@@ -95,9 +161,8 @@ Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
const FunctionDecl *Callee) {
assert(Callee && "Callee must be valid.");
+ CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
- CUDAFunctionTarget CallerTarget =
- (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
// If one of the targets is invalid, the check always fails, no matter what
// the other target is.
@@ -107,8 +172,7 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
// (a) Can't call global from some contexts until we support CUDA's
// dynamic parallelism.
if (CalleeTarget == CFT_Global &&
- (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
- (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
+ (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
return CFP_Never;
// (b) Calling HostDevice is OK for everyone.
@@ -145,54 +209,31 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
llvm_unreachable("All cases should've been handled by now.");
}
-template <typename T>
-static void EraseUnwantedCUDAMatchesImpl(
- Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
- std::function<const FunctionDecl *(const T &)> FetchDecl) {
+void Sema::EraseUnwantedCUDAMatches(
+ const FunctionDecl *Caller,
+ SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
if (Matches.size() <= 1)
return;
+ using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
+
// Gets the CUDA function preference for a call from Caller to Match.
- auto GetCFP = [&](const T &Match) {
- return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
+ auto GetCFP = [&](const Pair &Match) {
+ return IdentifyCUDAPreference(Caller, Match.second);
};
// Find the best call preference among the functions in Matches.
- Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
+ CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
Matches.begin(), Matches.end(),
- [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
+ [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
// Erase all functions with lower priority.
Matches.erase(
- llvm::remove_if(Matches,
- [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
+ llvm::remove_if(
+ Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }),
Matches.end());
}
-void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
- SmallVectorImpl<FunctionDecl *> &Matches){
- EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
- *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
-}
-
-void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
- SmallVectorImpl<DeclAccessPair> &Matches) {
- EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
- *this, Caller, Matches, [](const DeclAccessPair &item) {
- return dyn_cast<FunctionDecl>(item.getDecl());
- });
-}
-
-void Sema::EraseUnwantedCUDAMatches(
- const FunctionDecl *Caller,
- SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
- EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
- *this, Caller, Matches,
- [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
- return dyn_cast<FunctionDecl>(item.second);
- });
-}
-
/// When an implicitly-declared special member has to invoke more than one
/// base/field special member, conflicts may occur in the targets of these
/// members. For example, if one base's member __host__ and another's is
@@ -441,9 +482,23 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
// * a __device__ function with this signature was already declared, in which
// case in which case we output an error, unless the __device__ decl is in a
// system header, in which case we leave the constexpr function unattributed.
-void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
+//
+// In addition, all function decls are treated as __host__ __device__ when
+// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
+// #pragma clang force_cuda_host_device_begin/end
+// pair).
+void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
const LookupResult &Previous) {
- assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+
+ if (ForceCUDAHostDeviceDepth > 0) {
+ if (!NewD->hasAttr<CUDAHostAttr>())
+ NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
+ if (!NewD->hasAttr<CUDADeviceAttr>())
+ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ return;
+ }
+
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
@@ -480,3 +535,378 @@ void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}
+
+// In CUDA, there are some constructs which may appear in semantically-valid
+// code, but trigger errors if we ever generate code for the function in which
+// they appear. Essentially every construct you're not allowed to use on the
+// device falls into this category, because you are allowed to use these
+// constructs in a __host__ __device__ function, but only if that function is
+// never codegen'ed on the device.
+//
+// To handle semantic checking for these constructs, we keep track of the set of
+// functions we know will be emitted, either because we could tell a priori that
+// they would be emitted, or because they were transitively called by a
+// known-emitted function.
+//
+// We also keep a partial call graph of which not-known-emitted functions call
+// which other not-known-emitted functions.
+//
+// When we see something which is illegal if the current function is emitted
+// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
+// CheckCUDACall), we first check if the current function is known-emitted. If
+// so, we immediately output the diagnostic.
+//
+// Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags
+// until we discover that the function is known-emitted, at which point we take
+// it out of this map and emit the diagnostic.
+
+Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
+ unsigned DiagID, FunctionDecl *Fn,
+ Sema &S)
+ : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn),
+ ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) {
+ switch (K) {
+ case K_Nop:
+ break;
+ case K_Immediate:
+ case K_ImmediateWithCallStack:
+ ImmediateDiag.emplace(S.Diag(Loc, DiagID));
+ break;
+ case K_Deferred:
+ assert(Fn && "Must have a function to attach the deferred diag to.");
+ PartialDiag.emplace(S.PDiag(DiagID));
+ break;
+ }
+}
+
+// Print notes showing how we can reach FD starting from an a priori
+// known-callable function.
+static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) {
+ auto FnIt = S.CUDAKnownEmittedFns.find(FD);
+ while (FnIt != S.CUDAKnownEmittedFns.end()) {
+ DiagnosticBuilder Builder(
+ S.Diags.Report(FnIt->second.Loc, diag::note_called_by));
+ Builder << FnIt->second.FD;
+ Builder.setForceEmit();
+
+ FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD);
+ }
+}
+
+Sema::CUDADiagBuilder::~CUDADiagBuilder() {
+ if (ImmediateDiag) {
+ // Emit our diagnostic and, if it was a warning or error, output a callstack
+ // if Fn isn't a priori known-emitted.
+ bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel(
+ DiagID, Loc) >= DiagnosticsEngine::Warning;
+ ImmediateDiag.reset(); // Emit the immediate diag.
+ if (IsWarningOrError && ShowCallStack)
+ EmitCallStackNotes(S, Fn);
+ } else if (PartialDiag) {
+ assert(ShowCallStack && "Must always show call stack for deferred diags.");
+ S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)});
+ }
+}
+
+// Do we know that we will eventually codegen the given function?
+static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
+ // Templates are emitted when they're instantiated.
+ if (FD->isDependentContext())
+ return false;
+
+ // When compiling for device, host functions are never emitted. Similarly,
+ // when compiling for host, device and global functions are never emitted.
+ // (Technically, we do emit a host-side stub for global functions, but this
+ // doesn't count for our purposes here.)
+ Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
+ if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
+ return false;
+ if (!S.getLangOpts().CUDAIsDevice &&
+ (T == Sema::CFT_Device || T == Sema::CFT_Global))
+ return false;
+
+ // Check whether this function is externally visible -- if so, it's
+ // known-emitted.
+ //
+ // We have to check the GVA linkage of the function's *definition* -- if we
+ // only have a declaration, we don't know whether or not the function will be
+ // emitted, because (say) the definition could include "inline".
+ FunctionDecl *Def = FD->getDefinition();
+
+ // We may currently be parsing the body of FD, in which case
+ // FD->getDefinition() will be null, but we still want to treat FD as though
+ // it's a definition.
+ if (!Def && FD->willHaveBody())
+ Def = FD;
+
+ if (Def &&
+ !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
+ return true;
+
+ // Otherwise, the function is known-emitted if it's in our set of
+ // known-emitted functions.
+ return S.CUDAKnownEmittedFns.count(FD) > 0;
+}
+
+Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
+ unsigned DiagID) {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (CurrentCUDATarget()) {
+ case CFT_Global:
+ case CFT_Device:
+ return CUDADiagBuilder::K_Immediate;
+ case CFT_HostDevice:
+ // An HD function counts as host code if we're compiling for host, and
+ // device code if we're compiling for device. Defer any errors in device
+ // mode until the function is known-emitted.
+ if (getLangOpts().CUDAIsDevice) {
+ return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ ? CUDADiagBuilder::K_ImmediateWithCallStack
+ : CUDADiagBuilder::K_Deferred;
+ }
+ return CUDADiagBuilder::K_Nop;
+
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
+ return CUDADiagBuilder(DiagKind, Loc, DiagID,
+ dyn_cast<FunctionDecl>(CurContext), *this);
+}
+
+Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
+ unsigned DiagID) {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (CurrentCUDATarget()) {
+ case CFT_Host:
+ return CUDADiagBuilder::K_Immediate;
+ case CFT_HostDevice:
+ // An HD function counts as host code if we're compiling for host, and
+ // device code if we're compiling for device. Defer any errors in device
+ // mode until the function is known-emitted.
+ if (getLangOpts().CUDAIsDevice)
+ return CUDADiagBuilder::K_Nop;
+
+ return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ ? CUDADiagBuilder::K_ImmediateWithCallStack
+ : CUDADiagBuilder::K_Deferred;
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
+ return CUDADiagBuilder(DiagKind, Loc, DiagID,
+ dyn_cast<FunctionDecl>(CurContext), *this);
+}
+
+// Emit any deferred diagnostics for FD and erase them from the map in which
+// they're stored.
+static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
+ auto It = S.CUDADeferredDiags.find(FD);
+ if (It == S.CUDADeferredDiags.end())
+ return;
+ bool HasWarningOrError = false;
+ for (PartialDiagnosticAt &PDAt : It->second) {
+ const SourceLocation &Loc = PDAt.first;
+ const PartialDiagnostic &PD = PDAt.second;
+ HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel(
+ PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning;
+ DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
+ Builder.setForceEmit();
+ PD.Emit(Builder);
+ }
+ S.CUDADeferredDiags.erase(It);
+
+ // FIXME: Should this be called after every warning/error emitted in the loop
+ // above, instead of just once per function? That would be consistent with
+ // how we handle immediate errors, but it also seems like a bit much.
+ if (HasWarningOrError)
+ EmitCallStackNotes(S, FD);
+}
+
+// Indicate that this function (and thus everything it transtively calls) will
+// be codegen'ed, and emit any deferred diagnostics on this function and its
+// (transitive) callees.
+static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller,
+ FunctionDecl *OrigCallee, SourceLocation OrigLoc) {
+ // Nothing to do if we already know that FD is emitted.
+ if (IsKnownEmitted(S, OrigCallee)) {
+ assert(!S.CUDACallGraph.count(OrigCallee));
+ return;
+ }
+
+ // We've just discovered that OrigCallee is known-emitted. Walk our call
+ // graph to see what else we can now discover also must be emitted.
+
+ struct CallInfo {
+ FunctionDecl *Caller;
+ FunctionDecl *Callee;
+ SourceLocation Loc;
+ };
+ llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}};
+ llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen;
+ Seen.insert(OrigCallee);
+ while (!Worklist.empty()) {
+ CallInfo C = Worklist.pop_back_val();
+ assert(!IsKnownEmitted(S, C.Callee) &&
+ "Worklist should not contain known-emitted functions.");
+ S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc};
+ EmitDeferredDiags(S, C.Callee);
+
+ // If this is a template instantiation, explore its callgraph as well:
+ // Non-dependent calls are part of the template's callgraph, while dependent
+ // calls are part of to the instantiation's call graph.
+ if (auto *Templ = C.Callee->getPrimaryTemplate()) {
+ FunctionDecl *TemplFD = Templ->getAsFunction();
+ if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
+ Seen.insert(TemplFD);
+ Worklist.push_back(
+ {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc});
+ }
+ }
+
+ // Add all functions called by Callee to our worklist.
+ auto CGIt = S.CUDACallGraph.find(C.Callee);
+ if (CGIt == S.CUDACallGraph.end())
+ continue;
+
+ for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc :
+ CGIt->second) {
+ FunctionDecl *NewCallee = FDLoc.first;
+ SourceLocation CallLoc = FDLoc.second;
+ if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee))
+ continue;
+ Seen.insert(NewCallee);
+ Worklist.push_back(
+ {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc});
+ }
+
+ // C.Callee is now known-emitted, so we no longer need to maintain its list
+ // of callees in CUDACallGraph.
+ S.CUDACallGraph.erase(CGIt);
+ }
+}
+
+bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ assert(Callee && "Callee may not be null.");
+ // FIXME: Is bailing out early correct here? Should we instead assume that
+ // the caller is a global initializer?
+ FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+ if (!Caller)
+ return true;
+
+ // If the caller is known-emitted, mark the callee as known-emitted.
+ // Otherwise, mark the call in our call graph so we can traverse it later.
+ bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+ if (CallerKnownEmitted)
+ MarkKnownEmitted(*this, Caller, Callee, Loc);
+ else {
+ // If we have
+ // host fn calls kernel fn calls host+device,
+ // the HD function does not get instantiated on the host. We model this by
+ // omitting at the call to the kernel from the callgraph. This ensures
+ // that, when compiling for host, only HD functions actually called from the
+ // host get marked as known-emitted.
+ if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+ CUDACallGraph[Caller].insert({Callee, Loc});
+ }
+
+ CUDADiagBuilder::Kind DiagKind = [&] {
+ switch (IdentifyCUDAPreference(Caller, Callee)) {
+ case CFP_Never:
+ return CUDADiagBuilder::K_Immediate;
+ case CFP_WrongSide:
+ assert(Caller && "WrongSide calls require a non-null caller");
+ // If we know the caller will be emitted, we know this wrong-side call
+ // will be emitted, so it's an immediate error. Otherwise, defer the
+ // error until we know the caller is emitted.
+ return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack
+ : CUDADiagBuilder::K_Deferred;
+ default:
+ return CUDADiagBuilder::K_Nop;
+ }
+ }();
+
+ if (DiagKind == CUDADiagBuilder::K_Nop)
+ return true;
+
+ // Avoid emitting this error twice for the same location. Using a hashtable
+ // like this is unfortunate, but because we must continue parsing as normal
+ // after encountering a deferred error, it's otherwise very tricky for us to
+ // ensure that we only emit this deferred error once.
+ if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
+ return true;
+
+ CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
+ << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
+ CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
+ Caller, *this)
+ << Callee;
+ return DiagKind != CUDADiagBuilder::K_Immediate &&
+ DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack;
+}
+
+void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
+ return;
+ FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
+ if (!CurFn)
+ return;
+ CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
+ if (Target == CFT_Global || Target == CFT_Device) {
+ Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ } else if (Target == CFT_HostDevice) {
+ Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
+ }
+}
+
+void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
+ const LookupResult &Previous) {
+ assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+ CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
+ for (NamedDecl *OldND : Previous) {
+ FunctionDecl *OldFD = OldND->getAsFunction();
+ if (!OldFD)
+ continue;
+
+ CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
+ // Don't allow HD and global functions to overload other functions with the
+ // same signature. We allow overloading based on CUDA attributes so that
+ // functions can have different implementations on the host and device, but
+ // HD/global functions "exist" in some sense on both the host and device, so
+ // should have the same implementation on both sides.
+ if (NewTarget != OldTarget &&
+ ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
+ (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
+ !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
+ /* ConsiderCudaAttrs = */ false)) {
+ Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
+ << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
+ Diag(OldFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ break;
+ }
+ }
+}
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+ const FunctionDecl &TemplateFD) {
+ if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+ AttrTy *Clone = Attribute->clone(S.Context);
+ Clone->setInherited(true);
+ FD->addAttr(Clone);
+ }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+ const FunctionTemplateDecl &TD) {
+ const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+ copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}