//===- ACCImplicitData.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements the OpenACC specification for "Variables with
// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2).
//
// Overview:
// ---------
// The pass automatically generates data clause operations for variables used
// within OpenACC compute constructs (parallel, kernels, serial) that do not
// already have explicit data clauses. The semantics follow these rules:
//
// 1. If there is a default(none) clause visible, no implicit data actions
//    apply.
//
// 2. An aggregate variable (arrays, derived types, etc.) will be treated as:
//    - In a present clause when default(present) is visible.
//    - In a copy clause otherwise.
//
// 3. A scalar variable will be treated as if it appears in:
//    - A copy clause if the compute construct is a kernels construct.
//    - A firstprivate clause otherwise (parallel, serial).
//
// Requirements:
// -------------
// To use this pass in a pipeline, the following requirements must be met:
//
// 1. Type Interface Implementation: Variables from the dialect being used
//    must implement one or both of the following MLIR interfaces:
//    `acc::MappableType` and/or `acc::PointerLikeType`
//
//    These interfaces provide the necessary methods for the pass to:
//    - Determine variable type categories (scalar vs. aggregate)
//    - Generate appropriate bounds information
//    - Generate privatization recipes
//
// 2. Operation Interface Implementation: Operations that access partial
//    entities or create views should implement the following MLIR
//    interfaces: `acc::PartialEntityAccess` and/or
//    `mlir::ViewLikeOpInterface`
//
//    These interfaces are used for proper data clause ordering, ensuring
//    that base entities are mapped before derived entities (e.g., a
//    struct is mapped before its fields, an array is mapped before
//    subarray views).
//
// 3. Analysis Registration (Optional): If custom behavior is needed for
//    variable name extraction or alias analysis, the dialect should
//    pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses.
//
//    If not registered, default behavior will be used.
//
// Implementation Details:
// -----------------------
// The pass performs the following operations:
//
// 1. Finds candidate variables which are live-in to the compute region and
//    are not already in a data clause or private clause.
//
// 2. Generates both data "entry" and "exit" clause operations that match
//    the intended action depending on variable type:
//    - copy -> acc.copyin (entry) + acc.copyout (exit)
//    - present -> acc.present (entry) + acc.delete (exit)
//    - firstprivate -> acc.firstprivate (entry only, no exit)
//
// 3. Ensures that default clause is taken into consideration by looking
//    through current construct and parent constructs to find the "visible
//    default clause".
//
// 4. Fixes up SSA value links so that uses in the acc region reference the
//    result of the newly created data clause operations.
//
// 5. When generating implicit data clause operations, it also adds variable
//    name information and marks them with the implicit flag.
//
// 6. Recipes are generated by calling the appropriate entrypoints in the
//    MappableType and PointerLikeType interfaces.
//
// 7. AliasAnalysis is used to determine if a variable is already covered by
//    an existing data clause (e.g., an interior pointer covered by its parent).
//
// Examples:
// ---------
//
// Example 1: Scalar in parallel construct (implicit firstprivate)
//
// Before:
//   func.func @test() {
//     %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
//     acc.parallel {
//       %val = memref.load %scalar[] : memref<f32>
//       acc.yield
//     }
//   }
//
// After:
//   func.func @test() {
//     %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
//     %firstpriv = acc.firstprivate varPtr(%scalar : memref<f32>)
//                    -> memref<f32> {implicit = true, name = "x"}
//     acc.parallel firstprivate(@recipe -> %firstpriv : memref<f32>) {
//       %val = memref.load %firstpriv[] : memref<f32>
//       acc.yield
//     }
//   }
//
// Example 2: Scalar in kernels construct (implicit copy)
//
// Before:
//   func.func @test() {
//     %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
//     acc.kernels {
//       %val = memref.load %scalar[] : memref<i32>
//       acc.terminator
//     }
//   }
//
// After:
//   func.func @test() {
//     %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
//     %copyin = acc.copyin varPtr(%scalar : memref<i32>) -> memref<i32>
//                 {dataClause = #acc<data_clause acc_copy>,
//                  implicit = true, name = "n"}
//     acc.kernels dataOperands(%copyin : memref<i32>) {
//       %val = memref.load %copyin[] : memref<i32>
//       acc.terminator
//     }
//     acc.copyout accPtr(%copyin : memref<i32>)
//                 to varPtr(%scalar : memref<i32>)
//                 {dataClause = #acc<data_clause acc_copy>,
//                  implicit = true, name = "n"}
//   }
//
// Example 3: Array (aggregate) in parallel (implicit copy)
//
// Before:
//   func.func @test() {
//     %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
//     acc.parallel {
//       %c0 = arith.constant 0 : index
//       %val = memref.load %array[%c0] : memref<100xf32>
//       acc.yield
//     }
//   }
//
// After:
//   func.func @test() {
//     %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
//     %copyin = acc.copyin varPtr(%array : memref<100xf32>)
//                 -> memref<100xf32>
//                 {dataClause = #acc<data_clause acc_copy>,
//                  implicit = true, name = "arr"}
//     acc.parallel dataOperands(%copyin : memref<100xf32>) {
//       %c0 = arith.constant 0 : index
//       %val = memref.load %copyin[%c0] : memref<100xf32>
//       acc.yield
//     }
//     acc.copyout accPtr(%copyin : memref<100xf32>)
//                 to varPtr(%array : memref<100xf32>)
//                 {dataClause = #acc<data_clause acc_copy>,
//                  implicit = true, name = "arr"}
//   }
//
// Example 4: Array with default(present)
//
// Before:
//   func.func @test() {
//     %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
//     acc.parallel {
//       %c0 = arith.constant 0 : index
//       %val = memref.load %array[%c0] : memref<100xf32>
//       acc.yield
//     } attributes {defaultAttr = #acc<defaultvalue present>}
//   }
//
// After:
//   func.func @test() {
//     %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
//     %present = acc.present varPtr(%array : memref<100xf32>)
//                  -> memref<100xf32>
//                  {implicit = true, name = "arr"}
//     acc.parallel dataOperands(%present : memref<100xf32>)
//                  attributes {defaultAttr = #acc<defaultvalue present>} {
//       %c0 = arith.constant 0 : index
//       %val = memref.load %present[%c0] : memref<100xf32>
//       acc.yield
//     }
//     acc.delete accPtr(%present : memref<100xf32>)
//                {dataClause = #acc<data_clause acc_present>,
//                 implicit = true, name = "arr"}
//   }
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
#include "llvm/ADT/SmallVectorExtras.h"

#include "mlir/Analysis/AliasAnalysis.h"
#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/OpenACC/OpenACCUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/ErrorHandling.h"
#include <type_traits>

namespace mlir {
namespace acc {
#define GEN_PASS_DEF_ACCIMPLICITDATA
#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
} // namespace acc
} // namespace mlir

#define DEBUG_TYPE "acc-implicit-data"

using namespace mlir;

namespace {

class ACCImplicitData : public acc::impl::ACCImplicitDataBase<ACCImplicitData> {
public:
  using acc::impl::ACCImplicitDataBase<ACCImplicitData>::ACCImplicitDataBase;

  void runOnOperation() override;

private:
  /// Looks through the `dominatingDataClauses` to find the original data clause
  /// op for an alias. Returns nullptr if no original data clause op is found.
  template <typename OpT>
  Operation *getOriginalDataClauseOpForAlias(
      Value var, OpBuilder &builder, OpT computeConstructOp,
      const SmallVector<Value> &dominatingDataClauses);

  /// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`,
  /// etc. data clause op for a candidate variable.
  template <typename OpT>
  Operation *generateDataClauseOpForCandidate(
      Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
      const SmallVector<Value> &dominatingDataClauses,
      const std::optional<acc::ClauseDefaultValue> &defaultClause);

  /// Generates the implicit data ops for a compute construct.
  template <typename OpT>
  void
  generateImplicitDataOps(ModuleOp &module, OpT computeConstructOp,
                          std::optional<acc::ClauseDefaultValue> &defaultClause,
                          acc::OpenACCSupport &accSupport);

  /// Generates a private recipe for a variable.
  acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var,
                                             Location loc, OpBuilder &builder,
                                             acc::OpenACCSupport &accSupport);

  /// Generates a firstprivate recipe for a variable.
  acc::FirstprivateRecipeOp
  generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc,
                             OpBuilder &builder,
                             acc::OpenACCSupport &accSupport);

  /// Generates recipes for a list of variables.
  void generateRecipes(ModuleOp &module, OpBuilder &builder,
                       Operation *computeConstructOp,
                       const SmallVector<Value> &newOperands);
};

/// Determines if a variable is a candidate for implicit data mapping.
/// Returns true if the variable is a candidate, false otherwise.
static bool isCandidateForImplicitData(Value val, Region &accRegion,
                                       acc::OpenACCSupport &accSupport) {
  // Ensure the variable is an allowed type for data clause.
  if (!acc::isPointerLikeType(val.getType()) &&
      !acc::isMappableType(val.getType()))
    return false;

  // If this is already coming from a data clause, we do not need to generate
  // another.
  if (isa_and_nonnull<ACC_DATA_ENTRY_OPS>(val.getDefiningOp()))
    return false;

  // Device data is a candidate - it will get a deviceptr clause.
  if (acc::isDeviceValue(val))
    return true;

  // If it is otherwise valid, skip it.
  if (accSupport.isValidValueUse(val, accRegion))
    return false;

  return true;
}

template <typename OpT>
Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
    Value var, OpBuilder &builder, OpT computeConstructOp,
    const SmallVector<Value> &dominatingDataClauses) {
  auto &aliasAnalysis = this->getAnalysis<AliasAnalysis>();
  for (auto dataClause : dominatingDataClauses) {
    if (auto *dataClauseOp = dataClause.getDefiningOp()) {
      // Only accept clauses that guarantee that the alias is present.
      if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
              acc::DevicePtrOp>(dataClauseOp))
        if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
          return dataClauseOp;
    }
  }
  return nullptr;
}

// Generates bounds for variables that have unknown dimensions
static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp,
                                             OpBuilder &builder) {

  if (!acc::getBounds(dataClauseOp).empty())
    // If bounds are already present, do not overwrite them.
    return;

  // For types that have unknown dimensions, attempt to generate bounds by
  // relying on MappableType being able to extract it from the IR.
  auto var = acc::getVar(dataClauseOp);
  auto type = var.getType();
  if (auto mappableTy = dyn_cast<acc::MappableType>(type)) {
    if (mappableTy.hasUnknownDimensions()) {
      TypeSwitch<Operation *>(dataClauseOp)
          .Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto dataClauseOp) {
            if (std::is_same_v<decltype(dataClauseOp), acc::DevicePtrOp>)
              return;
            OpBuilder::InsertionGuard guard(builder);
            builder.setInsertionPoint(dataClauseOp);
            auto bounds = mappableTy.generateAccBounds(var, builder);
            if (!bounds.empty())
              dataClauseOp.getBoundsMutable().assign(bounds);
          });
    }
  }
}

acc::PrivateRecipeOp
ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var,
                                       Location loc, OpBuilder &builder,
                                       acc::OpenACCSupport &accSupport) {
  auto type = var.getType();
  std::string recipeName =
      accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var);

  // Check if recipe already exists
  auto existingRecipe = module.lookupSymbol<acc::PrivateRecipeOp>(recipeName);
  if (existingRecipe)
    return existingRecipe;

  // Set insertion point to module body in a scoped way
  OpBuilder::InsertionGuard guard(builder);
  builder.setInsertionPointToStart(module.getBody());

  auto recipe =
      acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, type);
  if (!recipe.has_value())
    return accSupport.emitNYI(loc, "implicit private"), nullptr;
  return recipe.value();
}

acc::FirstprivateRecipeOp
ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var,
                                            Location loc, OpBuilder &builder,
                                            acc::OpenACCSupport &accSupport) {
  auto type = var.getType();
  std::string recipeName =
      accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var);

  // Check if recipe already exists
  auto existingRecipe =
      module.lookupSymbol<acc::FirstprivateRecipeOp>(recipeName);
  if (existingRecipe)
    return existingRecipe;

  // Set insertion point to module body in a scoped way
  OpBuilder::InsertionGuard guard(builder);
  builder.setInsertionPointToStart(module.getBody());

  auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc,
                                                             recipeName, type);
  if (!recipe.has_value())
    return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr;
  return recipe.value();
}

void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder,
                                      Operation *computeConstructOp,
                                      const SmallVector<Value> &newOperands) {
  auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
  for (auto var : newOperands) {
    auto loc{var.getLoc()};
    if (auto privateOp = var.getDefiningOp<acc::PrivateOp>()) {
      auto recipe = generatePrivateRecipe(
          module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
      if (recipe)
        privateOp.setRecipeAttr(
            SymbolRefAttr::get(module->getContext(), recipe.getSymName()));
    } else if (auto firstprivateOp = var.getDefiningOp<acc::FirstprivateOp>()) {
      auto recipe = generateFirstprivateRecipe(
          module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
      if (recipe)
        firstprivateOp.setRecipeAttr(SymbolRefAttr::get(
            module->getContext(), recipe.getSymName().str()));
    } else {
      accSupport.emitNYI(var.getLoc(), "implicit reduction");
    }
  }
}

// Generates the data entry data op clause so that it adheres to OpenACC
// rules as follows (line numbers and specification from OpenACC 3.4):
// 1388 An aggregate variable will be treated as if it appears either:
// 1389 - In a present clause if there is a default(present) clause visible at
// the compute construct.
// 1391 - In a copy clause otherwise.
// 1392 A scalar variable will be treated as if it appears either:
// 1393 - In a copy clause if the compute construct is a kernels construct.
// 1394 - In a firstprivate clause otherwise.
template <typename OpT>
Operation *ACCImplicitData::generateDataClauseOpForCandidate(
    Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
    const SmallVector<Value> &dominatingDataClauses,
    const std::optional<acc::ClauseDefaultValue> &defaultClause) {
  auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
  acc::VariableTypeCategory typeCategory =
      acc::VariableTypeCategory::uncategorized;
  if (auto mappableTy = dyn_cast<acc::MappableType>(var.getType())) {
    typeCategory = mappableTy.getTypeCategory(var);
  } else if (auto pointerLikeTy =
                 dyn_cast<acc::PointerLikeType>(var.getType())) {
    typeCategory = pointerLikeTy.getPointeeTypeCategory(
        cast<TypedValue<acc::PointerLikeType>>(var),
        pointerLikeTy.getElementType());
  }

  bool isScalar =
      acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar);
  bool isAnyAggregate = acc::bitEnumContainsAny(
      typeCategory, acc::VariableTypeCategory::aggregate);
  Location loc = computeConstructOp->getLoc();

  Operation *op = nullptr;
  op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp,
                                       dominatingDataClauses);
  if (op) {
    if (isa<acc::NoCreateOp>(op))
      return acc::NoCreateOp::create(builder, loc, var,
                                     /*structured=*/true, /*implicit=*/true,
                                     accSupport.getVariableName(var),
                                     acc::getBounds(op));

    if (isa<acc::DevicePtrOp>(op))
      return acc::DevicePtrOp::create(builder, loc, var,
                                      /*structured=*/true, /*implicit=*/true,
                                      accSupport.getVariableName(var),
                                      acc::getBounds(op));

    // The original data clause op is a PresentOp, CopyinOp, or CreateOp,
    // hence guaranteed to be present.
    return acc::PresentOp::create(builder, loc, var,
                                  /*structured=*/true, /*implicit=*/true,
                                  accSupport.getVariableName(var),
                                  acc::getBounds(op));
  }

  if (acc::isDeviceValue(var)) {
    // If the variable is device data, use deviceptr clause.
    return acc::DevicePtrOp::create(builder, loc, var,
                                    /*structured=*/true, /*implicit=*/true,
                                    accSupport.getVariableName(var));
  }

  if (isScalar) {
    if (enableImplicitReductionCopy &&
        acc::isOnlyUsedByReductionClauses(var,
                                          computeConstructOp->getRegion(0))) {
      auto copyinOp =
          acc::CopyinOp::create(builder, loc, var,
                                /*structured=*/true, /*implicit=*/true,
                                accSupport.getVariableName(var));
      copyinOp.setDataClause(acc::DataClause::acc_reduction);
      return copyinOp.getOperation();
    }
    if constexpr (std::is_same_v<OpT, acc::KernelsOp> ||
                  std::is_same_v<OpT, acc::KernelEnvironmentOp>) {
      // Scalars are implicit copyin in kernels construct.
      // We also do the same for acc.kernel_environment because semantics
      // of user variable mappings should be applied while ACC construct exists
      // and at this point we should only be dealing with unmapped variables
      // that were made live-in by the compiler.
      // TODO: This may be revisited.
      auto copyinOp =
          acc::CopyinOp::create(builder, loc, var,
                                /*structured=*/true, /*implicit=*/true,
                                accSupport.getVariableName(var));
      copyinOp.setDataClause(acc::DataClause::acc_copy);
      return copyinOp.getOperation();
    } else {
      // Scalars are implicit firstprivate in parallel and serial construct.
      return acc::FirstprivateOp::create(builder, loc, var,
                                         /*structured=*/true, /*implicit=*/true,
                                         accSupport.getVariableName(var));
    }
  } else if (isAnyAggregate) {
    Operation *newDataOp = nullptr;

    // When default(present) is true, the implicit behavior is present.
    if (defaultClause.has_value() &&
        defaultClause.value() == acc::ClauseDefaultValue::Present) {
      newDataOp = acc::PresentOp::create(builder, loc, var,
                                         /*structured=*/true, /*implicit=*/true,
                                         accSupport.getVariableName(var));
      newDataOp->setAttr(acc::getFromDefaultClauseAttrName(),
                         builder.getUnitAttr());
    } else {
      auto copyinOp =
          acc::CopyinOp::create(builder, loc, var,
                                /*structured=*/true, /*implicit=*/true,
                                accSupport.getVariableName(var));
      copyinOp.setDataClause(acc::DataClause::acc_copy);
      newDataOp = copyinOp.getOperation();
    }

    return newDataOp;
  } else {
    // This is not a fatal error - for example when the element type is
    // pointer type (aka we have a pointer of pointer), it is potentially a
    // deep copy scenario which is not being handled here.
    // Other types need to be canonicalized. Thus just log unhandled cases.
    LLVM_DEBUG(llvm::dbgs()
               << "Unhandled case for implicit data mapping " << var << "\n");
  }
  return nullptr;
}

// Ensures that result values from the acc data clause ops are used inside the
// acc region. ie:
// acc.kernels {
//   use %val
// }
// =>
// %dev = acc.dataop %val
// acc.kernels {
//   use %dev
// }
static void legalizeValuesInRegion(Region &accRegion,
                                   SmallVector<Value> &newPrivateOperands,
                                   SmallVector<Value> &newDataClauseOperands) {
  for (Value dataClause :
       llvm::concat<Value>(newDataClauseOperands, newPrivateOperands)) {
    Value var = acc::getVar(dataClause.getDefiningOp());
    replaceAllUsesInRegionWith(var, dataClause, accRegion);
  }
}

// Adds the private operands to the compute construct operation.
template <typename OpT>
static void addNewPrivateOperands(OpT &accOp,
                                  const SmallVector<Value> &privateOperands) {
  if (privateOperands.empty())
    return;

  for (auto priv : privateOperands) {
    if (isa<acc::PrivateOp>(priv.getDefiningOp())) {
      accOp.getPrivateOperandsMutable().append(priv);
    } else if (isa<acc::FirstprivateOp>(priv.getDefiningOp())) {
      accOp.getFirstprivateOperandsMutable().append(priv);
    } else {
      llvm_unreachable("unhandled reduction operand");
    }
  }
}

static Operation *findDataExitOp(Operation *dataEntryOp) {
  auto res = acc::getAccVar(dataEntryOp);
  for (auto *user : res.getUsers())
    if (isa<ACC_DATA_EXIT_OPS>(user))
      return user;
  return nullptr;
}

// Generates matching data exit operation as described in the acc dialect
// for how data clauses are decomposed:
// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories
// Key ones used here:
// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after
// region)
// * acc {construct} present -> acc.present (before region) + acc.delete
// (after region)
static void
generateDataExitOperations(OpBuilder &builder, Operation *accOp,
                           const SmallVector<Value> &newDataClauseOperands,
                           const SmallVector<Value> &sortedDataClauseOperands) {
  builder.setInsertionPointAfter(accOp);
  Value lastDataClause = nullptr;
  for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) {
    if (llvm::find(newDataClauseOperands, dataEntry) ==
        newDataClauseOperands.end()) {
      // If this is not a new data clause operand, we should not generate an
      // exit operation for it.
      lastDataClause = dataEntry;
      continue;
    }
    if (lastDataClause)
      if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp()))
        builder.setInsertionPointAfter(dataExitOp);
    Operation *dataEntryOp = dataEntry.getDefiningOp();
    if (isa<acc::CopyinOp>(dataEntryOp)) {
      auto copyoutOp = acc::CopyoutOp::create(
          builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp),
          /*structured=*/true, /*implicit=*/true,
          acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
      copyoutOp.setDataClause(acc::DataClause::acc_copy);
    } else if (isa<acc::PresentOp, acc::NoCreateOp>(dataEntryOp)) {
      auto deleteOp = acc::DeleteOp::create(
          builder, dataEntryOp->getLoc(), dataEntry,
          /*structured=*/true, /*implicit=*/true,
          acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
      deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value());
    } else if (isa<acc::DevicePtrOp>(dataEntryOp)) {
      // Do nothing.
    } else {
      llvm_unreachable("unhandled data exit");
    }
    lastDataClause = dataEntry;
  }
}

/// Returns all base references of a value in order.
/// So for example, if we have a reference to a struct field like
/// s.f1.f2.f3, this will return <s, s.f1, s.f1.f2, s.f1.f2.f3>.
/// Any intermediate casts/view-like operations are included in the
/// chain as well.
static SmallVector<Value> getBaseRefsChain(Value val) {
  SmallVector<Value> baseRefs;
  baseRefs.push_back(val);
  while (true) {
    Value prevVal = val;

    val = acc::getBaseEntity(val);
    if (val != baseRefs.front())
      baseRefs.insert(baseRefs.begin(), val);

    // If this is a view-like operation, it is effectively another
    // view of the same entity so we should add it to the chain also.
    if (auto viewLikeOp = val.getDefiningOp<ViewLikeOpInterface>()) {
      val = viewLikeOp.getViewSource();
      baseRefs.insert(baseRefs.begin(), val);
    }

    // Continue loop if we made any progress
    if (val == prevVal)
      break;
  }

  return baseRefs;
}

static void insertInSortedOrder(SmallVector<Value> &sortedDataClauseOperands,
                                Operation *newClause) {
  auto *insertPos =
      std::find_if(sortedDataClauseOperands.begin(),
                   sortedDataClauseOperands.end(), [&](Value dataClauseVal) {
                     // Get the base refs for the current clause we are looking
                     // at.
                     auto var = acc::getVar(dataClauseVal.getDefiningOp());
                     auto baseRefs = getBaseRefsChain(var);

                     // If the newClause is of a base ref of an existing clause,
                     // we should insert it right before the current clause.
                     // Thus return true to stop iteration when this is the
                     // case.
                     return std::find(baseRefs.begin(), baseRefs.end(),
                                      acc::getVar(newClause)) != baseRefs.end();
                   });

  if (insertPos != sortedDataClauseOperands.end()) {
    newClause->moveBefore(insertPos->getDefiningOp());
    sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause));
  } else {
    sortedDataClauseOperands.push_back(acc::getAccVar(newClause));
  }
}

template <typename OpT>
void ACCImplicitData::generateImplicitDataOps(
    ModuleOp &module, OpT computeConstructOp,
    std::optional<acc::ClauseDefaultValue> &defaultClause,
    acc::OpenACCSupport &accSupport) {
  // Implicit data attributes are only applied if "[t]here is no default(none)
  // clause visible at the compute construct."
  if (defaultClause.has_value() &&
      defaultClause.value() == acc::ClauseDefaultValue::None)
    return;
  assert(!defaultClause.has_value() ||
         defaultClause.value() == acc::ClauseDefaultValue::Present);

  // 1) Collect live-in values.
  Region &accRegion = computeConstructOp->getRegion(0);
  SetVector<Value> liveInValues;
  getUsedValuesDefinedAbove(accRegion, liveInValues);

  // 2) Run the filtering to find relevant pointers that need copied.
  auto isCandidate{[&](Value val) -> bool {
    return isCandidateForImplicitData(val, accRegion, accSupport);
  }};
  auto candidateVars(llvm::filter_to_vector(liveInValues, isCandidate));
  if (candidateVars.empty())
    return;

  // 3) Generate data clauses for the variables.
  SmallVector<Value> newPrivateOperands;
  SmallVector<Value> newDataClauseOperands;
  OpBuilder builder(computeConstructOp);
  if (!candidateVars.empty()) {
    LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n"
                            << computeConstructOp << "\n");
  }
  auto &domInfo = this->getAnalysis<DominanceInfo>();
  auto &postDomInfo = this->getAnalysis<PostDominanceInfo>();
  auto dominatingDataClauses =
      acc::getDominatingDataClauses(computeConstructOp, domInfo, postDomInfo);
  for (auto var : candidateVars) {
    auto newDataClauseOp = generateDataClauseOpForCandidate(
        var, module, builder, computeConstructOp, dominatingDataClauses,
        defaultClause);
    fillInBoundsForUnknownDimensions(newDataClauseOp, builder);
    LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n"
                            << "\t" << *newDataClauseOp << "\n");
    if (isa_and_nonnull<acc::PrivateOp, acc::FirstprivateOp, acc::ReductionOp>(
            newDataClauseOp)) {
      newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp));
    } else if (isa_and_nonnull<ACC_DATA_CLAUSE_OPS>(newDataClauseOp)) {
      newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp));
      dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp));
    }
  }

  // 4) Legalize values in region (aka the uses in the region are the result
  // of the data clause ops)
  legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands);

  // 5) Generate private recipes which are required for properly attaching
  // private operands.
  if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
                !std::is_same_v<OpT, acc::KernelEnvironmentOp>)
    generateRecipes(module, builder, computeConstructOp, newPrivateOperands);

  // 6) Figure out insertion order for the new data clause operands.
  SmallVector<Value> sortedDataClauseOperands(
      computeConstructOp.getDataClauseOperands());
  for (auto newClause : newDataClauseOperands)
    insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp());

  // 7) Generate the data exit operations.
  generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands,
                             sortedDataClauseOperands);
  // 8) Add all of the new operands to the compute construct op.
  if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
                !std::is_same_v<OpT, acc::KernelEnvironmentOp>)
    addNewPrivateOperands(computeConstructOp, newPrivateOperands);
  computeConstructOp.getDataClauseOperandsMutable().assign(
      sortedDataClauseOperands);
}

void ACCImplicitData::runOnOperation() {
  ModuleOp module = this->getOperation();

  acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();

  module.walk([&](Operation *op) {
    if (isa<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(op)) {
      assert(op->getNumRegions() == 1 && "must have 1 region");

      auto defaultClause = acc::getDefaultAttr(op);
      llvm::TypeSwitch<Operation *, void>(op)
          .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
              [&](auto op) {
                generateImplicitDataOps(module, op, defaultClause, accSupport);
              })
          .Default([&](Operation *) {});
    }
  });
}

} // namespace
