Bugzilla – Attachment 145225 Details for
Bug 111481
AMD Navi GPU frequent freezes on both Manjaro/Ubuntu with kernel 5.3 and mesa 19.2 -git/llvm9
Home
|
Browse
|
Search
|
[?]
[patch]
Merge last adg5f code
merge_last_amdgpu-for-5.3-rc6.patch (text/plain), 22.82 MB, created by
Mathieu Belanger
on 2019-08-31 22:15:36 UTC
(
hide
)
Description:
Merge last adg5f code
Filename:
MIME Type:
Creator:
Mathieu Belanger
Created:
2019-08-31 22:15:36 UTC
Size:
22.82 MB
patch
obsolete
>diff -Naur linux-5.3-rc6/arch/x86/kernel/early-quirks.c linux-5.3-rc6-agd5fed/arch/x86/kernel/early-quirks.c >--- linux-5.3-rc6/arch/x86/kernel/early-quirks.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/arch/x86/kernel/early-quirks.c 2019-08-31 15:01:11.825736165 -0500 >@@ -549,6 +549,7 @@ > INTEL_CNL_IDS(&gen9_early_ops), > INTEL_ICL_11_IDS(&gen11_early_ops), > INTEL_EHL_IDS(&gen11_early_ops), >+ INTEL_TGL_12_IDS(&gen11_early_ops), > }; > > struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); >diff -Naur linux-5.3-rc6/.clang-format linux-5.3-rc6-agd5fed/.clang-format >--- linux-5.3-rc6/.clang-format 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/.clang-format 1969-12-31 18:00:00.000000000 -0600 >@@ -1,493 +0,0 @@ >-# SPDX-License-Identifier: GPL-2.0 >-# >-# clang-format configuration file. Intended for clang-format >= 4. >-# >-# For more information, see: >-# >-# Documentation/process/clang-format.rst >-# https://clang.llvm.org/docs/ClangFormat.html >-# https://clang.llvm.org/docs/ClangFormatStyleOptions.html >-# >---- >-AccessModifierOffset: -4 >-AlignAfterOpenBracket: Align >-AlignConsecutiveAssignments: false >-AlignConsecutiveDeclarations: false >-#AlignEscapedNewlines: Left # Unknown to clang-format-4.0 >-AlignOperands: true >-AlignTrailingComments: false >-AllowAllParametersOfDeclarationOnNextLine: false >-AllowShortBlocksOnASingleLine: false >-AllowShortCaseLabelsOnASingleLine: false >-AllowShortFunctionsOnASingleLine: None >-AllowShortIfStatementsOnASingleLine: false >-AllowShortLoopsOnASingleLine: false >-AlwaysBreakAfterDefinitionReturnType: None >-AlwaysBreakAfterReturnType: None >-AlwaysBreakBeforeMultilineStrings: false >-AlwaysBreakTemplateDeclarations: false >-BinPackArguments: true >-BinPackParameters: true >-BraceWrapping: >- AfterClass: false >- AfterControlStatement: false >- AfterEnum: false >- AfterFunction: true >- AfterNamespace: true >- AfterObjCDeclaration: false >- AfterStruct: false >- AfterUnion: false >- #AfterExternBlock: false # Unknown to clang-format-5.0 >- BeforeCatch: false >- BeforeElse: false >- IndentBraces: false >- #SplitEmptyFunction: true # Unknown to clang-format-4.0 >- #SplitEmptyRecord: true # Unknown to clang-format-4.0 >- #SplitEmptyNamespace: true # Unknown to clang-format-4.0 >-BreakBeforeBinaryOperators: None >-BreakBeforeBraces: Custom >-#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0 >-BreakBeforeTernaryOperators: false >-BreakConstructorInitializersBeforeComma: false >-#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0 >-BreakAfterJavaFieldAnnotations: false >-BreakStringLiterals: false >-ColumnLimit: 80 >-CommentPragmas: '^ IWYU pragma:' >-#CompactNamespaces: false # Unknown to clang-format-4.0 >-ConstructorInitializerAllOnOneLineOrOnePerLine: false >-ConstructorInitializerIndentWidth: 8 >-ContinuationIndentWidth: 8 >-Cpp11BracedListStyle: false >-DerivePointerAlignment: false >-DisableFormat: false >-ExperimentalAutoDetectBinPacking: false >-#FixNamespaceComments: false # Unknown to clang-format-4.0 >- >-# Taken from: >-# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \ >-# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \ >-# | sort | uniq >-ForEachMacros: >- - 'apei_estatus_for_each_section' >- - 'ata_for_each_dev' >- - 'ata_for_each_link' >- - '__ata_qc_for_each' >- - 'ata_qc_for_each' >- - 'ata_qc_for_each_raw' >- - 'ata_qc_for_each_with_internal' >- - 'ax25_for_each' >- - 'ax25_uid_for_each' >- - '__bio_for_each_bvec' >- - 'bio_for_each_bvec' >- - 'bio_for_each_integrity_vec' >- - '__bio_for_each_segment' >- - 'bio_for_each_segment' >- - 'bio_for_each_segment_all' >- - 'bio_list_for_each' >- - 'bip_for_each_vec' >- - 'blkg_for_each_descendant_post' >- - 'blkg_for_each_descendant_pre' >- - 'blk_queue_for_each_rl' >- - 'bond_for_each_slave' >- - 'bond_for_each_slave_rcu' >- - 'bpf_for_each_spilled_reg' >- - 'btree_for_each_safe128' >- - 'btree_for_each_safe32' >- - 'btree_for_each_safe64' >- - 'btree_for_each_safel' >- - 'card_for_each_dev' >- - 'cgroup_taskset_for_each' >- - 'cgroup_taskset_for_each_leader' >- - 'cpufreq_for_each_entry' >- - 'cpufreq_for_each_entry_idx' >- - 'cpufreq_for_each_valid_entry' >- - 'cpufreq_for_each_valid_entry_idx' >- - 'css_for_each_child' >- - 'css_for_each_descendant_post' >- - 'css_for_each_descendant_pre' >- - 'device_for_each_child_node' >- - 'drm_atomic_crtc_for_each_plane' >- - 'drm_atomic_crtc_state_for_each_plane' >- - 'drm_atomic_crtc_state_for_each_plane_state' >- - 'drm_atomic_for_each_plane_damage' >- - 'drm_connector_for_each_possible_encoder' >- - 'drm_for_each_connector_iter' >- - 'drm_for_each_crtc' >- - 'drm_for_each_encoder' >- - 'drm_for_each_encoder_mask' >- - 'drm_for_each_fb' >- - 'drm_for_each_legacy_plane' >- - 'drm_for_each_plane' >- - 'drm_for_each_plane_mask' >- - 'drm_for_each_privobj' >- - 'drm_mm_for_each_hole' >- - 'drm_mm_for_each_node' >- - 'drm_mm_for_each_node_in_range' >- - 'drm_mm_for_each_node_safe' >- - 'flow_action_for_each' >- - 'for_each_active_drhd_unit' >- - 'for_each_active_iommu' >- - 'for_each_available_child_of_node' >- - 'for_each_bio' >- - 'for_each_board_func_rsrc' >- - 'for_each_bvec' >- - 'for_each_card_components' >- - 'for_each_card_links' >- - 'for_each_card_links_safe' >- - 'for_each_card_prelinks' >- - 'for_each_card_rtds' >- - 'for_each_card_rtds_safe' >- - 'for_each_cgroup_storage_type' >- - 'for_each_child_of_node' >- - 'for_each_clear_bit' >- - 'for_each_clear_bit_from' >- - 'for_each_cmsghdr' >- - 'for_each_compatible_node' >- - 'for_each_component_dais' >- - 'for_each_component_dais_safe' >- - 'for_each_comp_order' >- - 'for_each_console' >- - 'for_each_cpu' >- - 'for_each_cpu_and' >- - 'for_each_cpu_not' >- - 'for_each_cpu_wrap' >- - 'for_each_dev_addr' >- - 'for_each_dma_cap_mask' >- - 'for_each_dpcm_be' >- - 'for_each_dpcm_be_rollback' >- - 'for_each_dpcm_be_safe' >- - 'for_each_dpcm_fe' >- - 'for_each_drhd_unit' >- - 'for_each_dss_dev' >- - 'for_each_efi_memory_desc' >- - 'for_each_efi_memory_desc_in_map' >- - 'for_each_element' >- - 'for_each_element_extid' >- - 'for_each_element_id' >- - 'for_each_endpoint_of_node' >- - 'for_each_evictable_lru' >- - 'for_each_fib6_node_rt_rcu' >- - 'for_each_fib6_walker_rt' >- - 'for_each_free_mem_range' >- - 'for_each_free_mem_range_reverse' >- - 'for_each_func_rsrc' >- - 'for_each_hstate' >- - 'for_each_if' >- - 'for_each_iommu' >- - 'for_each_ip_tunnel_rcu' >- - 'for_each_irq_nr' >- - 'for_each_link_codecs' >- - 'for_each_lru' >- - 'for_each_matching_node' >- - 'for_each_matching_node_and_match' >- - 'for_each_memblock' >- - 'for_each_memblock_type' >- - 'for_each_memcg_cache_index' >- - 'for_each_mem_pfn_range' >- - 'for_each_mem_range' >- - 'for_each_mem_range_rev' >- - 'for_each_migratetype_order' >- - 'for_each_msi_entry' >- - 'for_each_msi_entry_safe' >- - 'for_each_net' >- - 'for_each_netdev' >- - 'for_each_netdev_continue' >- - 'for_each_netdev_continue_rcu' >- - 'for_each_netdev_feature' >- - 'for_each_netdev_in_bond_rcu' >- - 'for_each_netdev_rcu' >- - 'for_each_netdev_reverse' >- - 'for_each_netdev_safe' >- - 'for_each_net_rcu' >- - 'for_each_new_connector_in_state' >- - 'for_each_new_crtc_in_state' >- - 'for_each_new_mst_mgr_in_state' >- - 'for_each_new_plane_in_state' >- - 'for_each_new_private_obj_in_state' >- - 'for_each_node' >- - 'for_each_node_by_name' >- - 'for_each_node_by_type' >- - 'for_each_node_mask' >- - 'for_each_node_state' >- - 'for_each_node_with_cpus' >- - 'for_each_node_with_property' >- - 'for_each_of_allnodes' >- - 'for_each_of_allnodes_from' >- - 'for_each_of_cpu_node' >- - 'for_each_of_pci_range' >- - 'for_each_old_connector_in_state' >- - 'for_each_old_crtc_in_state' >- - 'for_each_old_mst_mgr_in_state' >- - 'for_each_oldnew_connector_in_state' >- - 'for_each_oldnew_crtc_in_state' >- - 'for_each_oldnew_mst_mgr_in_state' >- - 'for_each_oldnew_plane_in_state' >- - 'for_each_oldnew_plane_in_state_reverse' >- - 'for_each_oldnew_private_obj_in_state' >- - 'for_each_old_plane_in_state' >- - 'for_each_old_private_obj_in_state' >- - 'for_each_online_cpu' >- - 'for_each_online_node' >- - 'for_each_online_pgdat' >- - 'for_each_pci_bridge' >- - 'for_each_pci_dev' >- - 'for_each_pci_msi_entry' >- - 'for_each_populated_zone' >- - 'for_each_possible_cpu' >- - 'for_each_present_cpu' >- - 'for_each_prime_number' >- - 'for_each_prime_number_from' >- - 'for_each_process' >- - 'for_each_process_thread' >- - 'for_each_property_of_node' >- - 'for_each_registered_fb' >- - 'for_each_reserved_mem_region' >- - 'for_each_rtd_codec_dai' >- - 'for_each_rtd_codec_dai_rollback' >- - 'for_each_rtdcom' >- - 'for_each_rtdcom_safe' >- - 'for_each_set_bit' >- - 'for_each_set_bit_from' >- - 'for_each_sg' >- - 'for_each_sg_dma_page' >- - 'for_each_sg_page' >- - 'for_each_sibling_event' >- - 'for_each_subelement' >- - 'for_each_subelement_extid' >- - 'for_each_subelement_id' >- - '__for_each_thread' >- - 'for_each_thread' >- - 'for_each_zone' >- - 'for_each_zone_zonelist' >- - 'for_each_zone_zonelist_nodemask' >- - 'fwnode_for_each_available_child_node' >- - 'fwnode_for_each_child_node' >- - 'fwnode_graph_for_each_endpoint' >- - 'gadget_for_each_ep' >- - 'genradix_for_each' >- - 'genradix_for_each_from' >- - 'hash_for_each' >- - 'hash_for_each_possible' >- - 'hash_for_each_possible_rcu' >- - 'hash_for_each_possible_rcu_notrace' >- - 'hash_for_each_possible_safe' >- - 'hash_for_each_rcu' >- - 'hash_for_each_safe' >- - 'hctx_for_each_ctx' >- - 'hlist_bl_for_each_entry' >- - 'hlist_bl_for_each_entry_rcu' >- - 'hlist_bl_for_each_entry_safe' >- - 'hlist_for_each' >- - 'hlist_for_each_entry' >- - 'hlist_for_each_entry_continue' >- - 'hlist_for_each_entry_continue_rcu' >- - 'hlist_for_each_entry_continue_rcu_bh' >- - 'hlist_for_each_entry_from' >- - 'hlist_for_each_entry_from_rcu' >- - 'hlist_for_each_entry_rcu' >- - 'hlist_for_each_entry_rcu_bh' >- - 'hlist_for_each_entry_rcu_notrace' >- - 'hlist_for_each_entry_safe' >- - '__hlist_for_each_rcu' >- - 'hlist_for_each_safe' >- - 'hlist_nulls_for_each_entry' >- - 'hlist_nulls_for_each_entry_from' >- - 'hlist_nulls_for_each_entry_rcu' >- - 'hlist_nulls_for_each_entry_safe' >- - 'i3c_bus_for_each_i2cdev' >- - 'i3c_bus_for_each_i3cdev' >- - 'ide_host_for_each_port' >- - 'ide_port_for_each_dev' >- - 'ide_port_for_each_present_dev' >- - 'idr_for_each_entry' >- - 'idr_for_each_entry_continue' >- - 'idr_for_each_entry_ul' >- - 'inet_bind_bucket_for_each' >- - 'inet_lhash2_for_each_icsk_rcu' >- - 'key_for_each' >- - 'key_for_each_safe' >- - 'klp_for_each_func' >- - 'klp_for_each_func_safe' >- - 'klp_for_each_func_static' >- - 'klp_for_each_object' >- - 'klp_for_each_object_safe' >- - 'klp_for_each_object_static' >- - 'kvm_for_each_memslot' >- - 'kvm_for_each_vcpu' >- - 'list_for_each' >- - 'list_for_each_codec' >- - 'list_for_each_codec_safe' >- - 'list_for_each_entry' >- - 'list_for_each_entry_continue' >- - 'list_for_each_entry_continue_rcu' >- - 'list_for_each_entry_continue_reverse' >- - 'list_for_each_entry_from' >- - 'list_for_each_entry_from_rcu' >- - 'list_for_each_entry_from_reverse' >- - 'list_for_each_entry_lockless' >- - 'list_for_each_entry_rcu' >- - 'list_for_each_entry_reverse' >- - 'list_for_each_entry_safe' >- - 'list_for_each_entry_safe_continue' >- - 'list_for_each_entry_safe_from' >- - 'list_for_each_entry_safe_reverse' >- - 'list_for_each_prev' >- - 'list_for_each_prev_safe' >- - 'list_for_each_safe' >- - 'llist_for_each' >- - 'llist_for_each_entry' >- - 'llist_for_each_entry_safe' >- - 'llist_for_each_safe' >- - 'media_device_for_each_entity' >- - 'media_device_for_each_intf' >- - 'media_device_for_each_link' >- - 'media_device_for_each_pad' >- - 'mp_bvec_for_each_page' >- - 'mp_bvec_for_each_segment' >- - 'nanddev_io_for_each_page' >- - 'netdev_for_each_lower_dev' >- - 'netdev_for_each_lower_private' >- - 'netdev_for_each_lower_private_rcu' >- - 'netdev_for_each_mc_addr' >- - 'netdev_for_each_uc_addr' >- - 'netdev_for_each_upper_dev_rcu' >- - 'netdev_hw_addr_list_for_each' >- - 'nft_rule_for_each_expr' >- - 'nla_for_each_attr' >- - 'nla_for_each_nested' >- - 'nlmsg_for_each_attr' >- - 'nlmsg_for_each_msg' >- - 'nr_neigh_for_each' >- - 'nr_neigh_for_each_safe' >- - 'nr_node_for_each' >- - 'nr_node_for_each_safe' >- - 'of_for_each_phandle' >- - 'of_property_for_each_string' >- - 'of_property_for_each_u32' >- - 'pci_bus_for_each_resource' >- - 'ping_portaddr_for_each_entry' >- - 'plist_for_each' >- - 'plist_for_each_continue' >- - 'plist_for_each_entry' >- - 'plist_for_each_entry_continue' >- - 'plist_for_each_entry_safe' >- - 'plist_for_each_safe' >- - 'pnp_for_each_card' >- - 'pnp_for_each_dev' >- - 'protocol_for_each_card' >- - 'protocol_for_each_dev' >- - 'queue_for_each_hw_ctx' >- - 'radix_tree_for_each_slot' >- - 'radix_tree_for_each_tagged' >- - 'rbtree_postorder_for_each_entry_safe' >- - 'rdma_for_each_port' >- - 'resource_list_for_each_entry' >- - 'resource_list_for_each_entry_safe' >- - 'rhl_for_each_entry_rcu' >- - 'rhl_for_each_rcu' >- - 'rht_for_each' >- - 'rht_for_each_from' >- - 'rht_for_each_entry' >- - 'rht_for_each_entry_from' >- - 'rht_for_each_entry_rcu' >- - 'rht_for_each_entry_rcu_from' >- - 'rht_for_each_entry_safe' >- - 'rht_for_each_rcu' >- - 'rht_for_each_rcu_from' >- - '__rq_for_each_bio' >- - 'rq_for_each_bvec' >- - 'rq_for_each_segment' >- - 'scsi_for_each_prot_sg' >- - 'scsi_for_each_sg' >- - 'sctp_for_each_hentry' >- - 'sctp_skb_for_each' >- - 'shdma_for_each_chan' >- - '__shost_for_each_device' >- - 'shost_for_each_device' >- - 'sk_for_each' >- - 'sk_for_each_bound' >- - 'sk_for_each_entry_offset_rcu' >- - 'sk_for_each_from' >- - 'sk_for_each_rcu' >- - 'sk_for_each_safe' >- - 'sk_nulls_for_each' >- - 'sk_nulls_for_each_from' >- - 'sk_nulls_for_each_rcu' >- - 'snd_array_for_each' >- - 'snd_pcm_group_for_each_entry' >- - 'snd_soc_dapm_widget_for_each_path' >- - 'snd_soc_dapm_widget_for_each_path_safe' >- - 'snd_soc_dapm_widget_for_each_sink_path' >- - 'snd_soc_dapm_widget_for_each_source_path' >- - 'tb_property_for_each' >- - 'tcf_exts_for_each_action' >- - 'udp_portaddr_for_each_entry' >- - 'udp_portaddr_for_each_entry_rcu' >- - 'usb_hub_for_each_child' >- - 'v4l2_device_for_each_subdev' >- - 'v4l2_m2m_for_each_dst_buf' >- - 'v4l2_m2m_for_each_dst_buf_safe' >- - 'v4l2_m2m_for_each_src_buf' >- - 'v4l2_m2m_for_each_src_buf_safe' >- - 'virtio_device_for_each_vq' >- - 'xa_for_each' >- - 'xa_for_each_marked' >- - 'xa_for_each_start' >- - 'xas_for_each' >- - 'xas_for_each_conflict' >- - 'xas_for_each_marked' >- - 'zorro_for_each_dev' >- >-#IncludeBlocks: Preserve # Unknown to clang-format-5.0 >-IncludeCategories: >- - Regex: '.*' >- Priority: 1 >-IncludeIsMainRegex: '(Test)?$' >-IndentCaseLabels: false >-#IndentPPDirectives: None # Unknown to clang-format-5.0 >-IndentWidth: 8 >-IndentWrappedFunctionNames: false >-JavaScriptQuotes: Leave >-JavaScriptWrapImports: true >-KeepEmptyLinesAtTheStartOfBlocks: false >-MacroBlockBegin: '' >-MacroBlockEnd: '' >-MaxEmptyLinesToKeep: 1 >-NamespaceIndentation: Inner >-#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0 >-ObjCBlockIndentWidth: 8 >-ObjCSpaceAfterProperty: true >-ObjCSpaceBeforeProtocolList: true >- >-# Taken from git's rules >-#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0 >-PenaltyBreakBeforeFirstCallParameter: 30 >-PenaltyBreakComment: 10 >-PenaltyBreakFirstLessLess: 0 >-PenaltyBreakString: 10 >-PenaltyExcessCharacter: 100 >-PenaltyReturnTypeOnItsOwnLine: 60 >- >-PointerAlignment: Right >-ReflowComments: false >-SortIncludes: false >-#SortUsingDeclarations: false # Unknown to clang-format-4.0 >-SpaceAfterCStyleCast: false >-SpaceAfterTemplateKeyword: true >-SpaceBeforeAssignmentOperators: true >-#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0 >-#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0 >-SpaceBeforeParens: ControlStatements >-#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0 >-SpaceInEmptyParentheses: false >-SpacesBeforeTrailingComments: 1 >-SpacesInAngles: false >-SpacesInContainerLiterals: false >-SpacesInCStyleCastParentheses: false >-SpacesInParentheses: false >-SpacesInSquareBrackets: false >-Standard: Cpp03 >-TabWidth: 8 >-UseTab: Always >-... >diff -Naur linux-5.3-rc6/.cocciconfig linux-5.3-rc6-agd5fed/.cocciconfig >--- linux-5.3-rc6/.cocciconfig 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/.cocciconfig 1969-12-31 18:00:00.000000000 -0600 >@@ -1,3 +0,0 @@ >-[spatch] >- options = --timeout 200 >- options = --use-gitgrep >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/arm,pl11x.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/arm,pl11x.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/arm,pl11x.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/arm,pl11x.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -39,9 +39,11 @@ > > - port: describes LCD panel signals, following the common binding > for video transmitter interfaces; see >- Documentation/devicetree/bindings/media/video-interfaces.txt; >- when it is a TFT panel, the port's endpoint must define the >- following property: >+ Documentation/devicetree/bindings/media/video-interfaces.txt >+ >+Deprecated properties: >+ The port's endbpoint subnode had this, now deprecated property >+ in the past. Drivers should be able to survive without it: > > - arm,pl11x,tft-r0g0b0-pads: an array of three 32-bit values, > defining the way CLD pads are wired up; first value >@@ -80,7 +82,6 @@ > port { > clcd_pads: endpoint { > remote-endpoint = <&clcd_panel>; >- arm,pl11x,tft-r0g0b0-pads = <0 8 16>; > }; > }; > >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/bridge/sii902x.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/bridge/sii902x.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/bridge/sii902x.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/bridge/sii902x.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -26,9 +26,8 @@ > - clocks: phandle and clock specifier for each clock listed in > the clock-names property > - clock-names: "mclk" >- Describes SII902x MCLK input. MCLK is used to produce >- HDMI audio CTS values. This property is required if >- "#sound-dai-cells"-property is present. This property follows >+ Describes SII902x MCLK input. MCLK can be used to produce >+ HDMI audio CTS values. This property follows > Documentation/devicetree/bindings/clock/clock-bindings.txt > consumer binding. > >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,26 +0,0 @@ >-Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel >- >-This binding is compatible with the simple-panel binding, which is specified >-in simple-panel.txt in this directory. >- >-Required properties: >-- compatible: should be "ampire,am-480272h3tmqw-t01h" >- >-Optional properties: >-- power-supply: regulator to provide the supply voltage >-- enable-gpios: GPIO pin to enable or disable the panel >-- backlight: phandle of the backlight device attached to the panel >- >-Optional nodes: >-- Video port for RGB input. >- >-Example: >- panel_rgb: panel-rgb { >- compatible = "ampire,am-480272h3tmqw-t01h"; >- enable-gpios = <&gpioa 8 1>; >- port { >- panel_in_rgb: endpoint { >- remote-endpoint = <&controller_out_rgb>; >- }; >- }; >- }; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,42 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/ampire,am-480272h3tmqw-t01h.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel >+ >+maintainers: >+ - Yannick Fertre <yannick.fertre@st.com> >+ - Thierry Reding <treding@nvidia.com> >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: ampire,am-480272h3tmqw-t01h >+ >+ power-supply: true >+ enable-gpios: true >+ backlight: true >+ port: true >+ >+required: >+ - compatible >+ >+additionalProperties: false >+ >+examples: >+ - | >+ panel_rgb: panel { >+ compatible = "ampire,am-480272h3tmqw-t01h"; >+ enable-gpios = <&gpioa 8 1>; >+ port { >+ panel_in_rgb: endpoint { >+ remote-endpoint = <&controller_out_rgb>; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,9 +0,0 @@ >-Armadeus ST0700 Adapt. A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with >-an adapter board. >- >-Required properties: >-- compatible: "armadeus,st0700-adapt" >-- power-supply: see panel-common.txt >- >-Optional properties: >-- backlight: see panel-common.txt >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,33 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/armadeus,st0700-adapt.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Armadeus ST0700 Adapter >+ >+description: >+ A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with an adapter board. >+ >+maintainers: >+ - '"Sébastien Szymanski" <sebastien.szymanski@armadeus.com>' >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: armadeus,st0700-adapt >+ >+ power-supply: true >+ backlight: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - power-supply >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -10,7 +10,7 @@ > - compatible: should be "arm,versatile-tft-panel" > > Required subnodes: >-- port: see display/panel/panel-common.txt, graph.txt >+- port: see display/panel/panel-common.yaml, graph.txt > > > Example: >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,12 +0,0 @@ >-Banana Pi 7" (S070WV20-CT16) TFT LCD Panel >- >-Required properties: >-- compatible: should be "bananapi,s070wv20-ct16" >-- power-supply: see ./panel-common.txt >- >-Optional properties: >-- enable-gpios: see ./simple-panel.txt >-- backlight: see ./simple-panel.txt >- >-This binding is compatible with the simple-panel binding, which is specified >-in ./simple-panel.txt. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,31 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/bananapi,s070wv20-ct16.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Banana Pi 7" (S070WV20-CT16) TFT LCD Panel >+ >+maintainers: >+ - Chen-Yu Tsai <wens@csie.org> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: bananapi,s070wv20-ct16 >+ >+ power-supply: true >+ backlight: true >+ enable-gpios: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - power-supply >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,24 @@ >+Boe Himax8279d 1200x1920 TFT LCD panel >+ >+Required properties: >+- compatible: should be "boe,himax8279d8p" and one of: "boe,himax8279d10p" >+- reg: DSI virtual channel of the peripheral >+- enable-gpios: panel enable gpio >+- pp33-gpios: a GPIO phandle for the 3.3v pin that provides the supply voltage >+- pp18-gpios: a GPIO phandle for the 1.8v pin that provides the supply voltage >+ >+Optional properties: >+- backlight: phandle of the backlight device attached to the panel >+ >+Example: >+ >+ &mipi_dsi { >+ panel { >+ compatible = "boe,himax8279d8p", "boe,himax8279d10p"; >+ reg = <0>; >+ backlight = <&backlight>; >+ enable-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>; >+ pp33-gpios = <&gpio 35 GPIO_ACTIVE_HIGH>; >+ pp18-gpios = <&gpio 36 GPIO_ACTIVE_HIGH>; >+ }; >+ }; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,13 +0,0 @@ >-DLC Display Co. DLC0700YZG-1 7.0" WSVGA TFT LCD panel >- >-Required properties: >-- compatible: should be "dlc,dlc0700yzg-1" >-- power-supply: See simple-panel.txt >- >-Optional properties: >-- reset-gpios: See panel-common.txt >-- enable-gpios: See simple-panel.txt >-- backlight: See simple-panel.txt >- >-This binding is compatible with the simple-panel binding, which is specified >-in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,31 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/dlc,dlc0700yzg-1.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: DLC Display Co. DLC0700YZG-1 7.0" WSVGA TFT LCD panel >+ >+maintainers: >+ - Philipp Zabel <p.zabel@pengutronix.de> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: dlc,dlc0700yzg-1 >+ >+ reset-gpios: true >+ enable-gpios: true >+ backlight: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - power-supply >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/edt,et-series.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/edt,et-series.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/edt,et-series.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/edt,et-series.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -40,7 +40,7 @@ > | Identifier | compatbile | description | > +=================+=====================+=====================================+ > | ETM0700G0DH6 | edt,etm070080dh6 | WVGA TFT Display with capacitive | >-| | | Touchscreen | >+| | edt,etm0700g0dh6 | Touchscreen | > +-----------------+---------------------+-------------------------------------+ > | ETM0700G0BDH6 | edt,etm070080bdh6 | Same as ETM0700G0DH6 but with | > | | | inverted pixel clock. | >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt 2019-08-31 15:01:11.823736165 -0500 >@@ -0,0 +1,12 @@ >+GiantPlus 3.0" (320x240 pixels) 24-bit TFT LCD panel >+ >+Required properties: >+- compatible: should be "giantplus,gpm940b0" >+- power-supply: as specified in the base binding >+ >+Optional properties: >+- backlight: as specified in the base binding >+- enable-gpios: as specified in the base binding >+ >+This binding is compatible with the simple-panel binding, which is specified >+in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,7 +0,0 @@ >-Innolux Corporation 10.1" EE101IA-01D WXGA (1280x800) LVDS panel >- >-Required properties: >-- compatible: should be "innolux,ee101ia-01d" >- >-This binding is compatible with the lvds-panel binding, which is specified >-in panel-lvds.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,31 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/innolux,ee101ia-01d.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Innolux Corporation 10.1" EE101IA-01D WXGA (1280x800) LVDS panel >+ >+maintainers: >+ - Heiko Stuebner <heiko.stuebner@bq.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: lvds.yaml# >+ >+properties: >+ compatible: >+ items: >+ - const: innolux,ee101ia-01d >+ - {} # panel-lvds, but not listed here to avoid false select >+ >+ backlight: true >+ enable-gpios: true >+ power-supply: true >+ width-mm: true >+ height-mm: true >+ panel-timing: true >+ port: true >+ >+additionalProperties: false >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,42 @@ >+King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel >+ >+Required properties: >+- compatible: should be "kingdisplay,kd035g6-54nt" >+- power-supply: See panel-common.txt >+- reset-gpios: See panel-common.txt >+ >+Optional properties: >+- backlight: see panel-common.txt >+ >+The generic bindings for the SPI slaves documented in [1] also apply. >+ >+The device node can contain one 'port' child node with one child >+'endpoint' node, according to the bindings defined in [2]. This >+node should describe panel's video bus. >+ >+[1]: Documentation/devicetree/bindings/spi/spi-bus.txt >+[2]: Documentation/devicetree/bindings/graph.txt >+ >+Example: >+ >+&spi { >+ panel@0 { >+ compatible = "kingdisplay,kd035g6-54nt"; >+ reg = <0>; >+ >+ spi-max-frequency = <3125000>; >+ spi-3wire; >+ spi-cs-high; >+ >+ reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>; >+ >+ backlight = <&backlight>; >+ power-supply = <&ldo6>; >+ >+ port { >+ panel_input: endpoint { >+ remote-endpoint = <&panel_output>; >+ }; >+ }; >+ }; >+}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/lvds.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/lvds.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/lvds.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/lvds.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,107 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/lvds.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: LVDS Display Panel >+ >+maintainers: >+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+description: |+ >+ LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple >+ incompatible data link layers have been used over time to transmit image data >+ to LVDS panels. This bindings supports display panels compatible with the >+ following specifications. >+ >+ [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February >+ 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) >+ [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National >+ Semiconductor >+ [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video >+ Electronics Standards Association (VESA) >+ >+ Device compatible with those specifications have been marketed under the >+ FPD-Link and FlatLink brands. >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ contains: >+ const: panel-lvds >+ description: >+ Shall contain "panel-lvds" in addition to a mandatory panel-specific >+ compatible string defined in individual panel bindings. The "panel-lvds" >+ value shall never be used on its own. >+ >+ data-mapping: >+ enum: >+ - jeida-18 >+ - jeida-24 >+ - vesa-24 >+ description: | >+ The color signals mapping order. >+ >+ LVDS data mappings are defined as follows. >+ >+ - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and >+ [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. >+ >+ Slot 0 1 2 3 4 5 6 >+ ________________ _________________ >+ Clock \_______________________/ >+ ______ ______ ______ ______ ______ ______ ______ >+ DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< >+ DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< >+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< >+ >+ - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] >+ specifications. Data are transferred as follows on 4 LVDS lanes. >+ >+ Slot 0 1 2 3 4 5 6 >+ ________________ _________________ >+ Clock \_______________________/ >+ ______ ______ ______ ______ ______ ______ ______ >+ DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< >+ DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< >+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< >+ DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< >+ >+ - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. >+ Data are transferred as follows on 4 LVDS lanes. >+ >+ Slot 0 1 2 3 4 5 6 >+ ________________ _________________ >+ Clock \_______________________/ >+ ______ ______ ______ ______ ______ ______ ______ >+ DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< >+ DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< >+ DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< >+ DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< >+ >+ Control signals are mapped as follows. >+ >+ CTL0: HSync >+ CTL1: VSync >+ CTL2: Data Enable >+ CTL3: 0 >+ >+ data-mirror: >+ type: boolean >+ description: >+ If set, reverse the bit order described in the data mappings below on all >+ data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6. >+ >+required: >+ - compatible >+ - data-mapping >+ - width-mm >+ - height-mm >+ - panel-timing >+ - port >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,47 +0,0 @@ >-Mitsubishi AA204XD12 LVDS Display Panel >-======================================= >- >-The AA104XD12 is a 10.4" XGA TFT-LCD display panel. >- >-These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt >-with the following device-specific properties. >- >- >-Required properties: >- >-- compatible: Shall contain "mitsubishi,aa121td01" and "panel-lvds", in that >- order. >-- vcc-supply: Reference to the regulator powering the panel VCC pins. >- >- >-Example >-------- >- >-panel { >- compatible = "mitsubishi,aa104xd12", "panel-lvds"; >- vcc-supply = <&vcc_3v3>; >- >- width-mm = <210>; >- height-mm = <158>; >- >- data-mapping = "jeida-24"; >- >- panel-timing { >- /* 1024x768 @65Hz */ >- clock-frequency = <65000000>; >- hactive = <1024>; >- vactive = <768>; >- hsync-len = <136>; >- hfront-porch = <20>; >- hback-porch = <160>; >- vfront-porch = <3>; >- vback-porch = <29>; >- vsync-len = <6>; >- }; >- >- port { >- panel_in: endpoint { >- remote-endpoint = <&lvds_encoder>; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,75 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/mitsubishi,aa104xd12.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Mitsubishi AA104XD12 10.4" XGA LVDS Display Panel >+ >+maintainers: >+ - Laurent Pinchart <laurent.pinchart@ideasonboard.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: lvds.yaml# >+ >+properties: >+ compatible: >+ items: >+ - const: mitsubishi,aa104xd12 >+ - {} # panel-lvds, but not listed here to avoid false select >+ >+ vcc-supply: >+ description: Reference to the regulator powering the panel VCC pins. >+ >+ data-mapping: >+ const: jeida-24 >+ >+ width-mm: >+ const: 210 >+ >+ height-mm: >+ const: 158 >+ >+ panel-timing: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - vcc-supply >+ >+examples: >+ - |+ >+ >+ panel { >+ compatible = "mitsubishi,aa104xd12", "panel-lvds"; >+ vcc-supply = <&vcc_3v3>; >+ >+ width-mm = <210>; >+ height-mm = <158>; >+ >+ data-mapping = "jeida-24"; >+ >+ panel-timing { >+ /* 1024x768 @65Hz */ >+ clock-frequency = <65000000>; >+ hactive = <1024>; >+ vactive = <768>; >+ hsync-len = <136>; >+ hfront-porch = <20>; >+ hback-porch = <160>; >+ vfront-porch = <3>; >+ vback-porch = <29>; >+ vsync-len = <6>; >+ }; >+ >+ port { >+ panel_in: endpoint { >+ remote-endpoint = <&lvds_encoder>; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,47 +0,0 @@ >-Mitsubishi AA121TD01 LVDS Display Panel >-======================================= >- >-The AA121TD01 is a 12.1" WXGA TFT-LCD display panel. >- >-These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt >-with the following device-specific properties. >- >- >-Required properties: >- >-- compatible: Shall contain "mitsubishi,aa121td01" and "panel-lvds", in that >- order. >-- vcc-supply: Reference to the regulator powering the panel VCC pins. >- >- >-Example >-------- >- >-panel { >- compatible = "mitsubishi,aa121td01", "panel-lvds"; >- vcc-supply = <&vcc_3v3>; >- >- width-mm = <261>; >- height-mm = <163>; >- >- data-mapping = "jeida-24"; >- >- panel-timing { >- /* 1280x800 @60Hz */ >- clock-frequency = <71000000>; >- hactive = <1280>; >- vactive = <800>; >- hsync-len = <70>; >- hfront-porch = <20>; >- hback-porch = <70>; >- vsync-len = <5>; >- vfront-porch = <3>; >- vback-porch = <15>; >- }; >- >- port { >- panel_in: endpoint { >- remote-endpoint = <&lvds_encoder>; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,74 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/mitsubishi,aa121td01.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Mitsubishi AA121TD01 12.1" WXGA LVDS Display Panel >+ >+maintainers: >+ - Laurent Pinchart <laurent.pinchart@ideasonboard.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: lvds.yaml# >+ >+properties: >+ compatible: >+ items: >+ - const: mitsubishi,aa121td01 >+ - {} # panel-lvds, but not listed here to avoid false select >+ >+ vcc-supply: >+ description: Reference to the regulator powering the panel VCC pins. >+ >+ data-mapping: >+ const: jeida-24 >+ >+ width-mm: >+ const: 261 >+ >+ height-mm: >+ const: 163 >+ >+ panel-timing: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - vcc-supply >+ >+examples: >+ - |+ >+ panel { >+ compatible = "mitsubishi,aa121td01", "panel-lvds"; >+ vcc-supply = <&vcc_3v3>; >+ >+ width-mm = <261>; >+ height-mm = <163>; >+ >+ data-mapping = "jeida-24"; >+ >+ panel-timing { >+ /* 1280x800 @60Hz */ >+ clock-frequency = <71000000>; >+ hactive = <1280>; >+ vactive = <800>; >+ hsync-len = <70>; >+ hfront-porch = <20>; >+ hback-porch = <70>; >+ vsync-len = <5>; >+ vfront-porch = <3>; >+ vback-porch = <15>; >+ }; >+ >+ port { >+ panel_in: endpoint { >+ remote-endpoint = <&lvds_encoder>; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,12 @@ >+OrtusTech COM37H3M05DTC Blanview 3.7" VGA portrait TFT-LCD panel >+ >+Required properties: >+- compatible: should be "ortustech,com37h3m05dtc" >+ >+Optional properties: >+- enable-gpios: GPIO pin to enable or disable the panel >+- backlight: phandle of the backlight device attached to the panel >+- power-supply: phandle of the regulator that provides the supply voltage >+ >+This binding is compatible with the simple-panel binding, which is specified >+in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,12 @@ >+OrtusTech COM37H3M99DTC Blanview 3.7" VGA portrait TFT-LCD panel >+ >+Required properties: >+- compatible: should be "ortustech,com37h3m99dtc" >+ >+Optional properties: >+- enable-gpios: GPIO pin to enable or disable the panel >+- backlight: phandle of the backlight device attached to the panel >+- power-supply: phandle of the regulator that provides the supply voltage >+ >+This binding is compatible with the simple-panel binding, which is specified >+in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,101 +0,0 @@ >-Common Properties for Display Panel >-=================================== >- >-This document defines device tree properties common to several classes of >-display panels. It doesn't constitue a device tree binding specification by >-itself but is meant to be referenced by device tree bindings. >- >-When referenced from panel device tree bindings the properties defined in this >-document are defined as follows. The panel device tree bindings are >-responsible for defining whether each property is required or optional. >- >- >-Descriptive Properties >----------------------- >- >-- width-mm, >-- height-mm: The width-mm and height-mm specify the width and height of the >- physical area where images are displayed. These properties are expressed in >- millimeters and rounded to the closest unit. >- >-- label: The label property specifies a symbolic name for the panel as a >- string suitable for use by humans. It typically contains a name inscribed on >- the system (e.g. as an affixed label) or specified in the system's >- documentation (e.g. in the user's manual). >- >- If no such name exists, and unless the property is mandatory according to >- device tree bindings, it shall rather be omitted than constructed of >- non-descriptive information. For instance an LCD panel in a system that >- contains a single panel shall not be labelled "LCD" if that name is not >- inscribed on the system or used in a descriptive fashion in system >- documentation. >- >- >-Display Timings >---------------- >- >-- panel-timing: Most display panels are restricted to a single resolution and >- require specific display timings. The panel-timing subnode expresses those >- timings as specified in the timing subnode section of the display timing >- bindings defined in >- Documentation/devicetree/bindings/display/panel/display-timing.txt. >- >- >-Connectivity >------------- >- >-- ports: Panels receive video data through one or multiple connections. While >- the nature of those connections is specific to the panel type, the >- connectivity is expressed in a standard fashion using ports as specified in >- the device graph bindings defined in >- Documentation/devicetree/bindings/graph.txt. >- >-- ddc-i2c-bus: Some panels expose EDID information through an I2C-compatible >- bus such as DDC2 or E-DDC. For such panels the ddc-i2c-bus contains a >- phandle to the system I2C controller connected to that bus. >- >- >-Control I/Os >------------- >- >-Many display panels can be controlled through pins driven by GPIOs. The nature >-and timing of those control signals are device-specific and left for panel >-device tree bindings to specify. The following GPIO specifiers can however be >-used for panels that implement compatible control signals. >- >-- enable-gpios: Specifier for a GPIO connected to the panel enable control >- signal. The enable signal is active high and enables operation of the panel. >- This property can also be used for panels implementing an active low power >- down signal, which is a negated version of the enable signal. Active low >- enable signals (or active high power down signals) can be supported by >- inverting the GPIO specifier polarity flag. >- >- Note that the enable signal control panel operation only and must not be >- confused with a backlight enable signal. >- >-- reset-gpios: Specifier for a GPIO coonnected to the panel reset control >- signal. The reset signal is active low and resets the panel internal logic >- while active. Active high reset signals can be supported by inverting the >- GPIO specifier polarity flag. >- >-Power >------ >- >-- power-supply: display panels require power to be supplied. While several >- panels need more than one power supply with panel-specific constraints >- governing the order and timings of the power supplies, in many cases a single >- power supply is sufficient, either because the panel has a single power rail, >- or because all its power rails can be driven by the same supply. In that case >- the power-supply property specifies the supply powering the panel as a phandle >- to a regulator. >- >-Backlight >---------- >- >-Most display panels include a backlight. Some of them also include a backlight >-controller exposed through a control bus such as I2C or DSI. Others expose >-backlight control through GPIO, PWM or other signals connected to an external >-backlight controller. >- >-- backlight: For panels whose backlight is controlled by an external backlight >- controller, this property contains a phandle that references the controller. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,149 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/panel-common.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Common Properties for Display Panels >+ >+maintainers: >+ - Thierry Reding <thierry.reding@gmail.com> >+ - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> >+ >+description: | >+ This document defines device tree properties common to several classes of >+ display panels. It doesn't constitue a device tree binding specification by >+ itself but is meant to be referenced by device tree bindings. >+ >+ When referenced from panel device tree bindings the properties defined in this >+ document are defined as follows. The panel device tree bindings are >+ responsible for defining whether each property is required or optional. >+ >+properties: >+ # Descriptive Properties >+ width-mm: >+ description: >+ Specifies the width of the physical area where images are displayed. This >+ property is expressed in millimeters and rounded to the closest unit. >+ >+ height-mm: >+ description: >+ Specifies the height of the physical area where images are displayed. This >+ property is expressed in millimeters and rounded to the closest unit. >+ >+ label: >+ description: | >+ The label property specifies a symbolic name for the panel as a >+ string suitable for use by humans. It typically contains a name inscribed >+ on the system (e.g. as an affixed label) or specified in the system's >+ documentation (e.g. in the user's manual). >+ >+ If no such name exists, and unless the property is mandatory according to >+ device tree bindings, it shall rather be omitted than constructed of >+ non-descriptive information. For instance an LCD panel in a system that >+ contains a single panel shall not be labelled "LCD" if that name is not >+ inscribed on the system or used in a descriptive fashion in system >+ documentation. >+ >+ rotation: >+ description: >+ Display rotation in degrees counter clockwise (0,90,180,270) >+ allOf: >+ - $ref: /schemas/types.yaml#/definitions/uint32 >+ - enum: [ 0, 90, 180, 270 ] >+ >+ # Display Timings >+ panel-timing: >+ type: object >+ description: >+ Most display panels are restricted to a single resolution and >+ require specific display timings. The panel-timing subnode expresses those >+ timings as specified in the timing subnode section of the display timing >+ bindings defined in >+ Documentation/devicetree/bindings/display/panel/display-timing.txt. >+ >+ # Connectivity >+ port: >+ type: object >+ >+ ports: >+ type: object >+ description: >+ Panels receive video data through one or multiple connections. While >+ the nature of those connections is specific to the panel type, the >+ connectivity is expressed in a standard fashion using ports as specified >+ in the device graph bindings defined in >+ Documentation/devicetree/bindings/graph.txt. >+ >+ ddc-i2c-bus: >+ $ref: /schemas/types.yaml#/definitions/phandle >+ description: >+ Some panels expose EDID information through an I2C-compatible >+ bus such as DDC2 or E-DDC. For such panels the ddc-i2c-bus contains a >+ phandle to the system I2C controller connected to that bus. >+ >+ no-hpd: >+ type: boolean >+ description: >+ This panel is supposed to communicate that it's ready via HPD >+ (hot plug detect) signal, but the signal isn't hooked up so we should >+ hardcode the max delay from the panel spec when powering up the panel. >+ >+ # Control I/Os >+ >+ # Many display panels can be controlled through pins driven by GPIOs. The nature >+ # and timing of those control signals are device-specific and left for panel >+ # device tree bindings to specify. The following GPIO specifiers can however be >+ # used for panels that implement compatible control signals. >+ >+ enable-gpios: >+ maxItems: 1 >+ description: | >+ Specifier for a GPIO connected to the panel enable control signal. The >+ enable signal is active high and enables operation of the panel. This >+ property can also be used for panels implementing an active low power down >+ signal, which is a negated version of the enable signal. Active low enable >+ signals (or active high power down signals) can be supported by inverting >+ the GPIO specifier polarity flag. >+ >+ Note that the enable signal control panel operation only and must not be >+ confused with a backlight enable signal. >+ >+ reset-gpios: >+ maxItems: 1 >+ description: >+ Specifier for a GPIO connected to the panel reset control signal. >+ The reset signal is active low and resets the panel internal logic >+ while active. Active high reset signals can be supported by inverting the >+ GPIO specifier polarity flag. >+ >+ # Power >+ power-supply: >+ description: >+ Display panels require power to be supplied. While several panels need >+ more than one power supply with panel-specific constraints governing the >+ order and timings of the power supplies, in many cases a single power >+ supply is sufficient, either because the panel has a single power rail, or >+ because all its power rails can be driven by the same supply. In that case >+ the power-supply property specifies the supply powering the panel as a >+ phandle to a regulator. >+ >+ # Backlight >+ >+ # Most display panels include a backlight. Some of them also include a backlight >+ # controller exposed through a control bus such as I2C or DSI. Others expose >+ # backlight control through GPIO, PWM or other signals connected to an external >+ # backlight controller. >+ >+ backlight: >+ $ref: /schemas/types.yaml#/definitions/phandle >+ description: >+ For panels whose backlight is controlled by an external backlight >+ controller, this property contains a phandle that references the >+ controller. >+ >+dependencies: >+ width-mm: [ height-mm ] >+ height-mm: [ width-mm ] >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-lvds.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-lvds.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-lvds.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-lvds.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,121 +0,0 @@ >-LVDS Display Panel >-================== >- >-LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple >-incompatible data link layers have been used over time to transmit image data >-to LVDS panels. This bindings supports display panels compatible with the >-following specifications. >- >-[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February >-1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) >-[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National >-Semiconductor >-[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video >-Electronics Standards Association (VESA) >- >-Device compatible with those specifications have been marketed under the >-FPD-Link and FlatLink brands. >- >- >-Required properties: >- >-- compatible: Shall contain "panel-lvds" in addition to a mandatory >- panel-specific compatible string defined in individual panel bindings. The >- "panel-lvds" value shall never be used on its own. >-- width-mm: See panel-common.txt. >-- height-mm: See panel-common.txt. >-- data-mapping: The color signals mapping order, "jeida-18", "jeida-24" >- or "vesa-24". >- >-Optional properties: >- >-- label: See panel-common.txt. >-- gpios: See panel-common.txt. >-- backlight: See panel-common.txt. >-- power-supply: See panel-common.txt. >-- data-mirror: If set, reverse the bit order described in the data mappings >- below on all data lanes, transmitting bits for slots 6 to 0 instead of >- 0 to 6. >- >-Required nodes: >- >-- panel-timing: See panel-common.txt. >-- ports: See panel-common.txt. These bindings require a single port subnode >- corresponding to the panel LVDS input. >- >- >-LVDS data mappings are defined as follows. >- >-- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and >- [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. >- >-Slot 0 1 2 3 4 5 6 >- ________________ _________________ >-Clock \_______________________/ >- ______ ______ ______ ______ ______ ______ ______ >-DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< >-DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< >-DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< >- >-- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] >- specifications. Data are transferred as follows on 4 LVDS lanes. >- >-Slot 0 1 2 3 4 5 6 >- ________________ _________________ >-Clock \_______________________/ >- ______ ______ ______ ______ ______ ______ ______ >-DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< >-DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< >-DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< >-DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< >- >-- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. >- Data are transferred as follows on 4 LVDS lanes. >- >-Slot 0 1 2 3 4 5 6 >- ________________ _________________ >-Clock \_______________________/ >- ______ ______ ______ ______ ______ ______ ______ >-DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< >-DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< >-DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< >-DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< >- >-Control signals are mapped as follows. >- >-CTL0: HSync >-CTL1: VSync >-CTL2: Data Enable >-CTL3: 0 >- >- >-Example >-------- >- >-panel { >- compatible = "mitsubishi,aa121td01", "panel-lvds"; >- >- width-mm = <261>; >- height-mm = <163>; >- >- data-mapping = "jeida-24"; >- >- panel-timing { >- /* 1280x800 @60Hz */ >- clock-frequency = <71000000>; >- hactive = <1280>; >- vactive = <800>; >- hsync-len = <70>; >- hfront-porch = <20>; >- hback-porch = <70>; >- vsync-len = <5>; >- vfront-porch = <3>; >- vback-porch = <15>; >- }; >- >- port { >- panel_in: endpoint { >- remote-endpoint = <&lvds_encoder>; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,4 +0,0 @@ >-Common display properties >-------------------------- >- >-- rotation: Display rotation in degrees counter clockwise (0,90,180,270) >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,14 +0,0 @@ >-PDA 91-00156-A0 5.0" WVGA TFT LCD panel >- >-Required properties: >-- compatible: should be "pda,91-00156-a0" >-- power-supply: this panel requires a single power supply. A phandle to a >-regulator needs to be specified here. Compatible with panel-common binding which >-is specified in the panel-common.txt in this directory. >-- backlight: this panel's backlight is controlled by an external backlight >-controller. A phandle to this controller needs to be specified here. >-Compatible with panel-common binding which is specified in the panel-common.txt >-in this directory. >- >-This binding is compatible with the simple-panel binding, which is specified >-in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,31 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/pda,91-00156-a0.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: PDA 91-00156-A0 5.0" WVGA TFT LCD panel >+ >+maintainers: >+ - Cristian Birsan <cristian.birsan@microchip.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: pda,91-00156-a0 >+ >+ power-supply: true >+ backlight: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - power-supply >+ - backlight >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,49 +0,0 @@ >-This binding covers the official 7" (800x480) Raspberry Pi touchscreen >-panel. >- >-This DSI panel contains: >- >-- TC358762 DSI->DPI bridge >-- Atmel microcontroller on I2C for power sequencing the DSI bridge and >- controlling backlight >-- Touchscreen controller on I2C for touch input >- >-and this binding covers the DSI display parts but not its touch input. >- >-Required properties: >-- compatible: Must be "raspberrypi,7inch-touchscreen-panel" >-- reg: Must be "45" >-- port: See panel-common.txt >- >-Example: >- >-dsi1: dsi@7e700000 { >- #address-cells = <1>; >- #size-cells = <0>; >- <...> >- >- port { >- dsi_out_port: endpoint { >- remote-endpoint = <&panel_dsi_port>; >- }; >- }; >-}; >- >-i2c_dsi: i2c { >- compatible = "i2c-gpio"; >- #address-cells = <1>; >- #size-cells = <0>; >- gpios = <&gpio 28 0 >- &gpio 29 0>; >- >- lcd@45 { >- compatible = "raspberrypi,7inch-touchscreen-panel"; >- reg = <0x45>; >- >- port { >- panel_dsi_port: endpoint { >- remote-endpoint = <&dsi_out_port>; >- }; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,71 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/raspberrypi,7inch-touchscreen.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: The official 7" (800x480) Raspberry Pi touchscreen >+ >+maintainers: >+ - Eric Anholt <eric@anholt.net> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+description: |+ >+ This DSI panel contains: >+ >+ - TC358762 DSI->DPI bridge >+ - Atmel microcontroller on I2C for power sequencing the DSI bridge and >+ controlling backlight >+ - Touchscreen controller on I2C for touch input >+ >+ and this binding covers the DSI display parts but not its touch input. >+ >+properties: >+ compatible: >+ const: raspberrypi,7inch-touchscreen-panel >+ >+ reg: >+ const: 0x45 >+ >+ port: true >+ >+required: >+ - compatible >+ - reg >+ - port >+ >+additionalProperties: false >+ >+examples: >+ - |+ >+ dsi1: dsi { >+ #address-cells = <1>; >+ #size-cells = <0>; >+ >+ port { >+ dsi_out_port: endpoint { >+ remote-endpoint = <&panel_dsi_port>; >+ }; >+ }; >+ }; >+ >+ i2c_dsi: i2c { >+ compatible = "i2c-gpio"; >+ #address-cells = <1>; >+ #size-cells = <0>; >+ scl-gpios = <&gpio 28 0>; >+ sda-gpios = <&gpio 29 0>; >+ >+ lcd@45 { >+ compatible = "raspberrypi,7inch-touchscreen-panel"; >+ reg = <0x45>; >+ >+ port { >+ panel_dsi_port: endpoint { >+ remote-endpoint = <&dsi_out_port>; >+ }; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,41 @@ >+Raydium RM67171 OLED LCD panel with MIPI-DSI protocol >+ >+Required properties: >+- compatible: "raydium,rm67191" >+- reg: virtual channel for MIPI-DSI protocol >+ must be <0> >+- dsi-lanes: number of DSI lanes to be used >+ must be <3> or <4> >+- port: input port node with endpoint definition as >+ defined in Documentation/devicetree/bindings/graph.txt; >+ the input port should be connected to a MIPI-DSI device >+ driver >+ >+Optional properties: >+- reset-gpios: a GPIO spec for the RST_B GPIO pin >+- v3p3-supply: phandle to 3.3V regulator that powers the VDD_3V3 pin >+- v1p8-supply: phandle to 1.8V regulator that powers the VDD_1V8 pin >+- width-mm: see panel-common.txt >+- height-mm: see panel-common.txt >+- video-mode: 0 - burst-mode >+ 1 - non-burst with sync event >+ 2 - non-burst with sync pulse >+ >+Example: >+ >+ panel@0 { >+ compatible = "raydium,rm67191"; >+ reg = <0>; >+ pinctrl-0 = <&pinctrl_mipi_dsi_0_1_en>; >+ pinctrl-names = "default"; >+ reset-gpios = <&gpio1 7 GPIO_ACTIVE_LOW>; >+ dsi-lanes = <4>; >+ width-mm = <68>; >+ height-mm = <121>; >+ >+ port { >+ panel_in: endpoint { >+ remote-endpoint = <&mipi_out>; >+ }; >+ }; >+ }; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -5,6 +5,9 @@ > - reg: DSI virtual channel of the peripheral > - reset-gpios: panel reset gpio > - backlight: phandle of the backlight device attached to the panel >+- vcc-supply: phandle of the regulator that provides the vcc supply voltage. >+- iovcc-supply: phandle of the regulator that provides the iovcc supply >+ voltage. > > Example: > >@@ -14,5 +17,7 @@ > reg = <0>; > backlight = <&backlight>; > reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; >+ vcc-supply = <®_2v8_p>; >+ iovcc-supply = <®_1v8_p>; > }; > }; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,41 +0,0 @@ >-Solomon Goldentek Display GKTW70SDAE4SE LVDS Display Panel >-========================================================== >- >-The GKTW70SDAE4SE is a 7" WVGA TFT-LCD display panel. >- >-These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt >-with the following device-specific properties. >- >-Required properties: >- >-- compatible: Shall contain "sgd,gktw70sdae4se" and "panel-lvds", in that order. >- >-Example >-------- >- >-panel { >- compatible = "sgd,gktw70sdae4se", "panel-lvds"; >- >- width-mm = <153>; >- height-mm = <86>; >- >- data-mapping = "jeida-18"; >- >- panel-timing { >- clock-frequency = <32000000>; >- hactive = <800>; >- vactive = <480>; >- hback-porch = <39>; >- hfront-porch = <39>; >- vback-porch = <29>; >- vfront-porch = <13>; >- hsync-len = <47>; >- vsync-len = <2>; >- }; >- >- port { >- panel_in: endpoint { >- remote-endpoint = <&lvds_encoder>; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,68 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/sgd,gktw70sdae4se.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: Solomon Goldentek Display GKTW70SDAE4SE 7" WVGA LVDS Display Panel >+ >+maintainers: >+ - Neil Armstrong <narmstrong@baylibre.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+allOf: >+ - $ref: lvds.yaml# >+ >+properties: >+ compatible: >+ items: >+ - const: sgd,gktw70sdae4se >+ - {} # panel-lvds, but not listed here to avoid false select >+ >+ data-mapping: >+ const: jeida-18 >+ >+ width-mm: >+ const: 153 >+ >+ height-mm: >+ const: 86 >+ >+ panel-timing: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ >+examples: >+ - |+ >+ panel { >+ compatible = "sgd,gktw70sdae4se", "panel-lvds"; >+ >+ width-mm = <153>; >+ height-mm = <86>; >+ >+ data-mapping = "jeida-18"; >+ >+ panel-timing { >+ clock-frequency = <32000000>; >+ hactive = <800>; >+ vactive = <480>; >+ hback-porch = <39>; >+ hfront-porch = <39>; >+ vback-porch = <29>; >+ vfront-porch = <13>; >+ hsync-len = <47>; >+ vsync-len = <2>; >+ }; >+ >+ port { >+ panel_in: endpoint { >+ remote-endpoint = <&lvds_encoder>; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,26 @@ >+Sharp LD-D5116Z01B 12.3" WUXGA+ eDP panel >+ >+Required properties: >+- compatible: should be "sharp,ld-d5116z01b" >+- power-supply: regulator to provide the VCC supply voltage (3.3 volts) >+ >+This binding is compatible with the simple-panel binding. >+ >+The device node can contain one 'port' child node with one child >+'endpoint' node, according to the bindings defined in [1]. This >+node should describe panel's video bus. >+ >+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt >+ >+Example: >+ >+ panel: panel { >+ compatible = "sharp,ld-d5116z01b"; >+ power-supply = <&vlcd_3v3>; >+ >+ port { >+ panel_ep: endpoint { >+ remote-endpoint = <&bridge_out_ep>; >+ }; >+ }; >+ }; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,12 @@ >+Sharp LQ070Y3DG3B 7.0" WVGA landscape TFT LCD panel >+ >+Required properties: >+- compatible: should be "sharp,lq070y3dg3b" >+ >+Optional properties: >+- enable-gpios: GPIO pin to enable or disable the panel >+- backlight: phandle of the backlight device attached to the panel >+- power-supply: phandle of the regulator that provides the supply voltage >+ >+This binding is compatible with the simple-panel binding, which is specified >+in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,12 @@ >+Sharp 2.0" (240x160 pixels) 16-bit TFT LCD panel >+ >+Required properties: >+- compatible: should be "sharp,ls020b1dd01d" >+- power-supply: as specified in the base binding >+ >+Optional properties: >+- backlight: as specified in the base binding >+- enable-gpios: as specified in the base binding >+ >+This binding is compatible with the simple-panel binding, which is specified >+in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/simple-panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/simple-panel.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/simple-panel.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/simple-panel.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -1,28 +1 @@ >-Simple display panel >-==================== >- >-panel node >----------- >- >-Required properties: >-- power-supply: See panel-common.txt >- >-Optional properties: >-- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing >-- enable-gpios: GPIO pin to enable or disable the panel >-- backlight: phandle of the backlight device attached to the panel >-- no-hpd: This panel is supposed to communicate that it's ready via HPD >- (hot plug detect) signal, but the signal isn't hooked up so we should >- hardcode the max delay from the panel spec when powering up the panel. >- >-Example: >- >- panel: panel { >- compatible = "cptt,claa101wb01"; >- ddc-i2c-bus = <&panelddc>; >- >- power-supply = <&vdd_pnl_reg>; >- enable-gpios = <&gpio 90 0>; >- >- backlight = <&backlight>; >- }; >+See panel-common.yaml in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,15 +0,0 @@ >-TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with >-resistive touch >- >-The panel is found on TI AM335x-evm. >- >-Required properties: >-- compatible: should be "tfc,s9700rtwv43tr-01b" >-- power-supply: See panel-common.txt >- >-Optional properties: >-- enable-gpios: GPIO pin to enable or disable the panel, if there is one >-- backlight: phandle of the backlight device attached to the panel >- >-This binding is compatible with the simple-panel binding, which is specified >-in simple-panel.txt in this directory. >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,33 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/tfc,s9700rtwv43tr-01b.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with resistive touch >+ >+maintainers: >+ - Jyri Sarha <jsarha@ti.com> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+description: |+ >+ The panel is found on TI AM335x-evm. >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ const: tfc,s9700rtwv43tr-01b >+ >+ enable-gpios: true >+ backlight: true >+ port: true >+ >+additionalProperties: false >+ >+required: >+ - compatible >+ - power-supply >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt 1969-12-31 18:00:00.000000000 -0600 >@@ -1,70 +0,0 @@ >-TPO TPG110 Panel >-================ >- >-This panel driver is a component that acts as an intermediary >-between an RGB output and a variety of panels. The panel >-driver is strapped up in electronics to the desired resolution >-and other properties, and has a control interface over 3WIRE >-SPI. By talking to the TPG110 over SPI, the strapped properties >-can be discovered and the hardware is therefore mostly >-self-describing. >- >- +--------+ >-SPI -> | TPO | -> physical display >-RGB -> | TPG110 | >- +--------+ >- >-If some electrical strap or alternate resolution is desired, >-this can be set up by taking software control of the display >-over the SPI interface. The interface can also adjust >-for properties of the display such as gamma correction and >-certain electrical driving levels. >- >-The TPG110 does not know the physical dimensions of the panel >-connected, so this needs to be specified in the device tree. >- >-It requires a GPIO line for control of its reset line. >- >-The serial protocol has line names that resemble I2C but the >-protocol is not I2C but 3WIRE SPI. >- >-Required properties: >-- compatible : one of: >- "ste,nomadik-nhk15-display", "tpo,tpg110" >- "tpo,tpg110" >-- grestb-gpios : panel reset GPIO >-- width-mm : see display/panel/panel-common.txt >-- height-mm : see display/panel/panel-common.txt >- >-The device needs to be a child of an SPI bus, see >-spi/spi-bus.txt. The SPI child must set the following >-properties: >-- spi-3wire >-- spi-max-frequency = <3000000>; >-as these are characteristics of this device. >- >-The device node can contain one 'port' child node with one child >-'endpoint' node, according to the bindings defined in >-media/video-interfaces.txt. This node should describe panel's video bus. >- >-Example >-------- >- >-panel: display@0 { >- compatible = "tpo,tpg110"; >- reg = <0>; >- spi-3wire; >- /* 320 ns min period ~= 3 MHz */ >- spi-max-frequency = <3000000>; >- /* Width and height from data sheet */ >- width-mm = <116>; >- height-mm = <87>; >- grestb-gpios = <&foo_gpio 5 GPIO_ACTIVE_LOW>; >- backlight = <&bl>; >- >- port { >- nomadik_clcd_panel: endpoint { >- remote-endpoint = <&foo>; >- }; >- }; >-}; >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml 2019-08-31 15:01:11.824736165 -0500 >@@ -0,0 +1,101 @@ >+# SPDX-License-Identifier: GPL-2.0 >+%YAML 1.2 >+--- >+$id: http://devicetree.org/schemas/display/panel/tpo,tpg110.yaml# >+$schema: http://devicetree.org/meta-schemas/core.yaml# >+ >+title: TPO TPG110 Panel >+ >+maintainers: >+ - Linus Walleij <linus.walleij@linaro.org> >+ - Thierry Reding <thierry.reding@gmail.com> >+ >+description: |+ >+ This panel driver is a component that acts as an intermediary >+ between an RGB output and a variety of panels. The panel >+ driver is strapped up in electronics to the desired resolution >+ and other properties, and has a control interface over 3WIRE >+ SPI. By talking to the TPG110 over SPI, the strapped properties >+ can be discovered and the hardware is therefore mostly >+ self-describing. >+ >+ +--------+ >+ SPI -> | TPO | -> physical display >+ RGB -> | TPG110 | >+ +--------+ >+ >+ If some electrical strap or alternate resolution is desired, >+ this can be set up by taking software control of the display >+ over the SPI interface. The interface can also adjust >+ for properties of the display such as gamma correction and >+ certain electrical driving levels. >+ >+ The TPG110 does not know the physical dimensions of the panel >+ connected, so this needs to be specified in the device tree. >+ >+ It requires a GPIO line for control of its reset line. >+ >+ The serial protocol has line names that resemble I2C but the >+ protocol is not I2C but 3WIRE SPI. >+ >+ >+allOf: >+ - $ref: panel-common.yaml# >+ >+properties: >+ compatible: >+ oneOf: >+ - items: >+ - enum: >+ - ste,nomadik-nhk15-display >+ - const: tpo,tpg110 >+ - const: tpo,tpg110 >+ >+ reg: true >+ >+ grestb-gpios: >+ maxItems: 1 >+ description: panel reset GPIO >+ >+ spi-3wire: true >+ >+ spi-max-frequency: >+ const: 3000000 >+ >+required: >+ - compatible >+ - reg >+ - grestb-gpios >+ - width-mm >+ - height-mm >+ - spi-3wire >+ - spi-max-frequency >+ - port >+ >+examples: >+ - |+ >+ spi { >+ #address-cells = <1>; >+ #size-cells = <0>; >+ >+ panel: display@0 { >+ compatible = "tpo,tpg110"; >+ reg = <0>; >+ spi-3wire; >+ /* 320 ns min period ~= 3 MHz */ >+ spi-max-frequency = <3000000>; >+ /* Width and height from data sheet */ >+ width-mm = <116>; >+ height-mm = <87>; >+ grestb-gpios = <&foo_gpio 5 1>; >+ backlight = <&bl>; >+ >+ port { >+ nomadik_clcd_panel: endpoint { >+ remote-endpoint = <&foo>; >+ }; >+ }; >+ }; >+ }; >+ >+... >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -32,17 +32,6 @@ > - video port 0 for the VOP input, the remote endpoint maybe vopb or vopl > - video port 1 for either a panel or subsequent encoder > >-the lvds panel described by >- Documentation/devicetree/bindings/display/panel/simple-panel.txt >- >-Panel required properties: >-- ports for remote LVDS output >- >-Panel optional properties: >-- data-mapping: should be "vesa-24","jeida-24" or "jeida-18". >-This describes decribed by: >- Documentation/devicetree/bindings/display/panel/panel-lvds.txt >- > Example: > > lvds_panel: lvds-panel { >diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/ssd1307fb.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/ssd1307fb.txt >--- linux-5.3-rc6/Documentation/devicetree/bindings/display/ssd1307fb.txt 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/ssd1307fb.txt 2019-08-31 15:01:11.824736165 -0500 >@@ -27,6 +27,15 @@ > - solomon,prechargep2: Length of precharge period (phase 2) in clock cycles. > This needs to be the higher, the higher the capacitance > of the OLED's pixels is >+ - solomon,dclk-div: Clock divisor 1 to 16 >+ - solomon,dclk-frq: Clock frequency 0 to 15, higher value means higher >+ frequency >+ - solomon,lookup-table: 8 bit value array of current drive pulse widths for >+ BANK0, and colors A, B, and C. Each value in range >+ of 31 to 63 for pulse widths of 32 to 64. Color D >+ is always width 64. >+ - solomon,area-color-enable: Display uses color mode >+ - solomon,low-power. Display runs in low power mode > > [0]: Documentation/devicetree/bindings/pwm/pwm.txt > >@@ -46,4 +55,5 @@ > solomon,com-lrremap; > solomon,com-invdir; > solomon,com-offset = <32>; >+ solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; > }; >diff -Naur linux-5.3-rc6/Documentation/gpu/drivers.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drivers.rst >--- linux-5.3-rc6/Documentation/gpu/drivers.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/drivers.rst 2019-08-31 15:01:11.824736165 -0500 >@@ -11,7 +11,6 @@ > meson > pl111 > tegra >- tinydrm > tve200 > v3d > vc4 >diff -Naur linux-5.3-rc6/Documentation/gpu/drm-kms-helpers.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drm-kms-helpers.rst >--- linux-5.3-rc6/Documentation/gpu/drm-kms-helpers.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/drm-kms-helpers.rst 2019-08-31 15:01:11.824736165 -0500 >@@ -263,6 +263,18 @@ > drm_dp_mst_topology_put_port > drm_dp_mst_get_mstb_malloc drm_dp_mst_put_mstb_malloc > >+MIPI DBI Helper Functions Reference >+=================================== >+ >+.. kernel-doc:: drivers/gpu/drm/drm_mipi_dbi.c >+ :doc: overview >+ >+.. kernel-doc:: include/drm/drm_mipi_dbi.h >+ :internal: >+ >+.. kernel-doc:: drivers/gpu/drm/drm_mipi_dbi.c >+ :export: >+ > MIPI DSI Helper Functions Reference > =================================== > >diff -Naur linux-5.3-rc6/Documentation/gpu/drm-mm.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drm-mm.rst >--- linux-5.3-rc6/Documentation/gpu/drm-mm.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/drm-mm.rst 2019-08-31 15:01:11.825736165 -0500 >@@ -433,43 +433,11 @@ > created for the OPTIMUS range of multi-gpu platforms. To userspace PRIME > buffers are dma-buf based file descriptors. > >-Overview and Driver Interface >------------------------------ >+Overview and Lifetime Rules >+--------------------------- > >-Similar to GEM global names, PRIME file descriptors are also used to >-share buffer objects across processes. They offer additional security: >-as file descriptors must be explicitly sent over UNIX domain sockets to >-be shared between applications, they can't be guessed like the globally >-unique GEM names. >- >-Drivers that support the PRIME API must set the DRIVER_PRIME bit in the >-struct :c:type:`struct drm_driver <drm_driver>` >-driver_features field, and implement the prime_handle_to_fd and >-prime_fd_to_handle operations. >- >-int (\*prime_handle_to_fd)(struct drm_device \*dev, struct drm_file >-\*file_priv, uint32_t handle, uint32_t flags, int \*prime_fd); int >-(\*prime_fd_to_handle)(struct drm_device \*dev, struct drm_file >-\*file_priv, int prime_fd, uint32_t \*handle); Those two operations >-convert a handle to a PRIME file descriptor and vice versa. Drivers must >-use the kernel dma-buf buffer sharing framework to manage the PRIME file >-descriptors. Similar to the mode setting API PRIME is agnostic to the >-underlying buffer object manager, as long as handles are 32bit unsigned >-integers. >- >-While non-GEM drivers must implement the operations themselves, GEM >-drivers must use the :c:func:`drm_gem_prime_handle_to_fd()` and >-:c:func:`drm_gem_prime_fd_to_handle()` helper functions. Those >-helpers rely on the driver gem_prime_export and gem_prime_import >-operations to create a dma-buf instance from a GEM object (dma-buf >-exporter role) and to create a GEM object from a dma-buf instance >-(dma-buf importer role). >- >-struct dma_buf \* (\*gem_prime_export)(struct drm_device \*dev, >-struct drm_gem_object \*obj, int flags); struct drm_gem_object \* >-(\*gem_prime_import)(struct drm_device \*dev, struct dma_buf >-\*dma_buf); These two operations are mandatory for GEM drivers that >-support PRIME. >+.. kernel-doc:: drivers/gpu/drm/drm_prime.c >+ :doc: overview and lifetime rules > > PRIME Helper Functions > ---------------------- >diff -Naur linux-5.3-rc6/Documentation/gpu/i915.rst linux-5.3-rc6-agd5fed/Documentation/gpu/i915.rst >--- linux-5.3-rc6/Documentation/gpu/i915.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/i915.rst 2019-08-31 15:01:11.825736165 -0500 >@@ -430,31 +430,31 @@ > GuC > === > >+Firmware Layout >+------------------- >+ >+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h >+ :doc: Firmware Layout >+ > GuC-specific firmware loader > ---------------------------- > >-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c >+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c > :internal: > > GuC-based command submission > ---------------------------- > >-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c >+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > :doc: GuC-based command submission > >-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c >+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > :internal: > >-GuC Firmware Layout >-------------------- >- >-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h >- :doc: GuC Firmware Layout >- > GuC Address Space > ----------------- > >-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c >+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c > :doc: GuC Address Space > > Tracing >diff -Naur linux-5.3-rc6/Documentation/gpu/introduction.rst linux-5.3-rc6-agd5fed/Documentation/gpu/introduction.rst >--- linux-5.3-rc6/Documentation/gpu/introduction.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/introduction.rst 2019-08-31 15:01:11.825736165 -0500 >@@ -51,6 +51,22 @@ > > Also read the :ref:`guidelines for the kernel documentation at large <doc_guide>`. > >+Documentation Requirements for kAPI >+----------------------------------- >+ >+All kernel APIs exported to other modules must be documented, including their >+datastructures and at least a short introductory section explaining the overall >+concepts. Documentation should be put into the code itself as kerneldoc comments >+as much as reasonable. >+ >+Do not blindly document everything, but document only what's relevant for driver >+authors: Internal functions of drm.ko and definitely static functions should not >+have formal kerneldoc comments. Use normal C comments if you feel like a comment >+is warranted. You may use kerneldoc syntax in the comment, but it shall not >+start with a /** kerneldoc marker. Similar for data structures, annotate >+anything entirely private with ``/* private: */`` comments as per the >+documentation guide. >+ > Getting Started > =============== > >diff -Naur linux-5.3-rc6/Documentation/gpu/tinydrm.rst linux-5.3-rc6-agd5fed/Documentation/gpu/tinydrm.rst >--- linux-5.3-rc6/Documentation/gpu/tinydrm.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/tinydrm.rst 1969-12-31 18:00:00.000000000 -0600 >@@ -1,30 +0,0 @@ >-============================ >-drm/tinydrm Tiny DRM drivers >-============================ >- >-tinydrm is a collection of DRM drivers that are so small they can fit in a >-single source file. >- >-Helpers >-======= >- >-.. kernel-doc:: include/drm/tinydrm/tinydrm-helpers.h >- :internal: >- >-.. kernel-doc:: drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c >- :export: >- >-.. kernel-doc:: drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c >- :export: >- >-MIPI DBI Compatible Controllers >-=============================== >- >-.. kernel-doc:: drivers/gpu/drm/tinydrm/mipi-dbi.c >- :doc: overview >- >-.. kernel-doc:: include/drm/tinydrm/mipi-dbi.h >- :internal: >- >-.. kernel-doc:: drivers/gpu/drm/tinydrm/mipi-dbi.c >- :export: >diff -Naur linux-5.3-rc6/Documentation/gpu/todo.rst linux-5.3-rc6-agd5fed/Documentation/gpu/todo.rst >--- linux-5.3-rc6/Documentation/gpu/todo.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/gpu/todo.rst 2019-08-31 15:01:11.825736165 -0500 >@@ -162,7 +162,7 @@ > > A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers. > And also a lot of them forward dma-buf mmap to the gem mmap implementations. >-Would be great to refactor this all into a set of small common helpers. >+There's drm_gem_prime_mmap() for this now, but still needs to be rolled out. > > Contact: Daniel Vetter > >@@ -196,15 +196,6 @@ > > Contact: Daniel Vetter, Noralf Tronnes > >-Remove the ->gem_prime_res_obj callback >--------------------------------------------- >- >-The ->gem_prime_res_obj callback can be removed from drivers by using the >-reservation_object in the drm_gem_object. It may also be possible to use the >-generic drm_gem_reservation_object_wait helper for waiting for a bo. >- >-Contact: Daniel Vetter >- > idr_init_base() > --------------- > >@@ -215,22 +206,13 @@ > > Contact: Daniel Vetter > >-Defaults for .gem_prime_import and export >------------------------------------------ >- >-Most drivers don't need to set drm_driver->gem_prime_import and >-->gem_prime_export now that drm_gem_prime_import() and drm_gem_prime_export() >-are the default. >- > struct drm_gem_object_funcs > --------------------------- > > GEM objects can now have a function table instead of having the callbacks on the > DRM driver struct. This is now the preferred way and drivers can be moved over. > >-DRM_GEM_CMA_VMAP_DRIVER_OPS, DRM_GEM_SHMEM_DRIVER_OPS already support this, but >-DRM_GEM_VRAM_DRIVER_PRIME does not yet and needs to be aligned with the previous >-two. We also need a 2nd version of the CMA define that doesn't require the >+We also need a 2nd version of the CMA define that doesn't require the > vmapping to be present (different hook for prime importing). Plus this needs to > be rolled out to all drivers using their own implementations, too. > >@@ -317,19 +299,6 @@ > > Contact: Daniel Vetter > >-Add missing kerneldoc for exported functions >--------------------------------------------- >- >-The DRM reference documentation is still lacking kerneldoc in a few areas. The >-task would be to clean up interfaces like moving functions around between >-files to better group them and improving the interfaces like dropping return >-values for functions that never fail. Then write kerneldoc for all exported >-functions and an overview section and integrate it all into the drm book. >- >-See https://dri.freedesktop.org/docs/drm/ for what's there already. >- >-Contact: Daniel Vetter >- > Make panic handling work > ------------------------ > >@@ -393,6 +362,9 @@ > this (together with the drm_minor->drm_device move) would allow us to remove > debugfs_init. > >+- Drop the return code and error checking from all debugfs functions. Greg KH is >+ working on this already. >+ > Contact: Daniel Vetter > > KMS cleanups >@@ -440,38 +412,21 @@ > > Contact: Daniel Vetter > >-Driver Specific >-=============== >+Backlight Refactoring >+--------------------- > >-tinydrm >-------- >+Backlight drivers have a triple enable/disable state, which is a bit overkill. >+Plan to fix this: > >-Tinydrm is the helper driver for really simple fb drivers. The goal is to make >-those drivers as simple as possible, so lots of room for refactoring: >+1. Roll out backlight_enable() and backlight_disable() helpers everywhere. This >+ has started already. >+2. In all, only look at one of the three status bits set by the above helpers. >+3. Remove the other two status bits. > >-- backlight helpers, probably best to put them into a new drm_backlight.c. >- This is because drivers/video is de-facto unmaintained. We could also >- move drivers/video/backlight to drivers/gpu/backlight and take it all >- over within drm-misc, but that's more work. Backlight helpers require a fair >- bit of reworking and refactoring. A simple example is the enabling of a backlight. >- Tinydrm has helpers for this. It would be good if other drivers can also use the >- helper. However, there are various cases we need to consider i.e different >- drivers seem to have different ways of enabling/disabling a backlight. >- We also need to consider the backlight drivers (like gpio_backlight). The situation >- is further complicated by the fact that the backlight is tied to fbdev >- via fb_notifier_callback() which has complicated logic. For further details, refer >- to the following discussion thread: >- https://groups.google.com/forum/#!topic/outreachy-kernel/8rBe30lwtdA >- >-- spi helpers, probably best put into spi core/helper code. Thierry said >- the spi maintainer is fast&reactive, so shouldn't be a big issue. >- >-- extract the mipi-dbi helper (well, the non-tinydrm specific parts at >- least) into a separate helper, like we have for mipi-dsi already. Or follow >- one of the ideas for having a shared dsi/dbi helper, abstracting away the >- transport details more. >+Contact: Daniel Vetter > >-Contact: Noralf Trønnes, Daniel Vetter >+Driver Specific >+=============== > > AMD DC Display Driver > --------------------- >diff -Naur linux-5.3-rc6/Documentation/media/uapi/v4l/subdev-formats.rst linux-5.3-rc6-agd5fed/Documentation/media/uapi/v4l/subdev-formats.rst >--- linux-5.3-rc6/Documentation/media/uapi/v4l/subdev-formats.rst 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/Documentation/media/uapi/v4l/subdev-formats.rst 2019-08-31 15:01:11.825736165 -0500 >@@ -1305,6 +1305,113 @@ > - g\ :sub:`6` > - g\ :sub:`5` > - g\ :sub:`4` >+ * .. _MEDIA-BUS-FMT-RGB888-3X8: >+ >+ - MEDIA_BUS_FMT_RGB888_3X8 >+ - 0x101c >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - r\ :sub:`7` >+ - r\ :sub:`6` >+ - r\ :sub:`5` >+ - r\ :sub:`4` >+ - r\ :sub:`3` >+ - r\ :sub:`2` >+ - r\ :sub:`1` >+ - r\ :sub:`0` >+ * - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - g\ :sub:`7` >+ - g\ :sub:`6` >+ - g\ :sub:`5` >+ - g\ :sub:`4` >+ - g\ :sub:`3` >+ - g\ :sub:`2` >+ - g\ :sub:`1` >+ - g\ :sub:`0` >+ * - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - >+ - b\ :sub:`7` >+ - b\ :sub:`6` >+ - b\ :sub:`5` >+ - b\ :sub:`4` >+ - b\ :sub:`3` >+ - b\ :sub:`2` >+ - b\ :sub:`1` >+ - b\ :sub:`0` > * .. _MEDIA-BUS-FMT-ARGB888-1X32: > > - MEDIA_BUS_FMT_ARGB888_1X32 >diff -Naur linux-5.3-rc6/drivers/dma-buf/dma-fence-chain.c linux-5.3-rc6-agd5fed/drivers/dma-buf/dma-fence-chain.c >--- linux-5.3-rc6/drivers/dma-buf/dma-fence-chain.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/dma-buf/dma-fence-chain.c 2019-08-31 15:01:11.833736166 -0500 >@@ -178,8 +178,30 @@ > static void dma_fence_chain_release(struct dma_fence *fence) > { > struct dma_fence_chain *chain = to_dma_fence_chain(fence); >+ struct dma_fence *prev; >+ >+ /* Manually unlink the chain as much as possible to avoid recursion >+ * and potential stack overflow. >+ */ >+ while ((prev = rcu_dereference_protected(chain->prev, true))) { >+ struct dma_fence_chain *prev_chain; >+ >+ if (kref_read(&prev->refcount) > 1) >+ break; >+ >+ prev_chain = to_dma_fence_chain(prev); >+ if (!prev_chain) >+ break; >+ >+ /* No need for atomic operations since we hold the last >+ * reference to prev_chain. >+ */ >+ chain->prev = prev_chain->prev; >+ RCU_INIT_POINTER(prev_chain->prev, NULL); >+ dma_fence_put(prev); >+ } >+ dma_fence_put(prev); > >- dma_fence_put(rcu_dereference_protected(chain->prev, true)); > dma_fence_put(chain->fence); > dma_fence_free(fence); > } >diff -Naur linux-5.3-rc6/drivers/dma-buf/reservation.c linux-5.3-rc6-agd5fed/drivers/dma-buf/reservation.c >--- linux-5.3-rc6/drivers/dma-buf/reservation.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/dma-buf/reservation.c 2019-08-31 15:01:11.838736166 -0500 >@@ -56,6 +56,85 @@ > EXPORT_SYMBOL(reservation_seqcount_string); > > /** >+ * reservation_object_list_alloc - allocate fence list >+ * @shared_max: number of fences we need space for >+ * >+ * Allocate a new reservation_object_list and make sure to correctly initialize >+ * shared_max. >+ */ >+static struct reservation_object_list * >+reservation_object_list_alloc(unsigned int shared_max) >+{ >+ struct reservation_object_list *list; >+ >+ list = kmalloc(offsetof(typeof(*list), shared[shared_max]), GFP_KERNEL); >+ if (!list) >+ return NULL; >+ >+ list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / >+ sizeof(*list->shared); >+ >+ return list; >+} >+ >+/** >+ * reservation_object_list_free - free fence list >+ * @list: list to free >+ * >+ * Free a reservation_object_list and make sure to drop all references. >+ */ >+static void reservation_object_list_free(struct reservation_object_list *list) >+{ >+ unsigned int i; >+ >+ if (!list) >+ return; >+ >+ for (i = 0; i < list->shared_count; ++i) >+ dma_fence_put(rcu_dereference_protected(list->shared[i], true)); >+ >+ kfree_rcu(list, rcu); >+} >+ >+/** >+ * reservation_object_init - initialize a reservation object >+ * @obj: the reservation object >+ */ >+void reservation_object_init(struct reservation_object *obj) >+{ >+ ww_mutex_init(&obj->lock, &reservation_ww_class); >+ >+ __seqcount_init(&obj->seq, reservation_seqcount_string, >+ &reservation_seqcount_class); >+ RCU_INIT_POINTER(obj->fence, NULL); >+ RCU_INIT_POINTER(obj->fence_excl, NULL); >+} >+EXPORT_SYMBOL(reservation_object_init); >+ >+/** >+ * reservation_object_fini - destroys a reservation object >+ * @obj: the reservation object >+ */ >+void reservation_object_fini(struct reservation_object *obj) >+{ >+ struct reservation_object_list *fobj; >+ struct dma_fence *excl; >+ >+ /* >+ * This object should be dead and all references must have >+ * been released to it, so no need to be protected with rcu. >+ */ >+ excl = rcu_dereference_protected(obj->fence_excl, 1); >+ if (excl) >+ dma_fence_put(excl); >+ >+ fobj = rcu_dereference_protected(obj->fence, 1); >+ reservation_object_list_free(fobj); >+ ww_mutex_destroy(&obj->lock); >+} >+EXPORT_SYMBOL(reservation_object_fini); >+ >+/** > * reservation_object_reserve_shared - Reserve space to add shared fences to > * a reservation_object. > * @obj: reservation object >@@ -87,7 +166,7 @@ > max = 4; > } > >- new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL); >+ new = reservation_object_list_alloc(max); > if (!new) > return -ENOMEM; > >@@ -108,23 +187,22 @@ > RCU_INIT_POINTER(new->shared[j++], fence); > } > new->shared_count = j; >- new->shared_max = max; > >- preempt_disable(); >- write_seqcount_begin(&obj->seq); > /* >- * RCU_INIT_POINTER can be used here, >- * seqcount provides the necessary barriers >+ * We are not changing the effective set of fences here so can >+ * merely update the pointer to the new array; both existing >+ * readers and new readers will see exactly the same set of >+ * active (unsignaled) shared fences. Individual fences and the >+ * old array are protected by RCU and so will not vanish under >+ * the gaze of the rcu_read_lock() readers. > */ >- RCU_INIT_POINTER(obj->fence, new); >- write_seqcount_end(&obj->seq); >- preempt_enable(); >+ rcu_assign_pointer(obj->fence, new); > > if (!old) > return 0; > > /* Drop the references to the signaled fences */ >- for (i = k; i < new->shared_max; ++i) { >+ for (i = k; i < max; ++i) { > struct dma_fence *fence; > > fence = rcu_dereference_protected(new->shared[i], >@@ -149,6 +227,7 @@ > struct dma_fence *fence) > { > struct reservation_object_list *fobj; >+ struct dma_fence *old; > unsigned int i, count; > > dma_fence_get(fence); >@@ -162,18 +241,16 @@ > write_seqcount_begin(&obj->seq); > > for (i = 0; i < count; ++i) { >- struct dma_fence *old_fence; > >- old_fence = rcu_dereference_protected(fobj->shared[i], >- reservation_object_held(obj)); >- if (old_fence->context == fence->context || >- dma_fence_is_signaled(old_fence)) { >- dma_fence_put(old_fence); >+ old = rcu_dereference_protected(fobj->shared[i], >+ reservation_object_held(obj)); >+ if (old->context == fence->context || >+ dma_fence_is_signaled(old)) > goto replace; >- } > } > > BUG_ON(fobj->shared_count >= fobj->shared_max); >+ old = NULL; > count++; > > replace: >@@ -183,6 +260,7 @@ > > write_seqcount_end(&obj->seq); > preempt_enable(); >+ dma_fence_put(old); > } > EXPORT_SYMBOL(reservation_object_add_shared_fence); > >@@ -239,7 +317,6 @@ > { > struct reservation_object_list *src_list, *dst_list; > struct dma_fence *old, *new; >- size_t size; > unsigned i; > > reservation_object_assert_held(dst); >@@ -251,10 +328,9 @@ > if (src_list) { > unsigned shared_count = src_list->shared_count; > >- size = offsetof(typeof(*src_list), shared[shared_count]); > rcu_read_unlock(); > >- dst_list = kmalloc(size, GFP_KERNEL); >+ dst_list = reservation_object_list_alloc(shared_count); > if (!dst_list) > return -ENOMEM; > >@@ -266,7 +342,6 @@ > } > > dst_list->shared_count = 0; >- dst_list->shared_max = shared_count; > for (i = 0; i < src_list->shared_count; ++i) { > struct dma_fence *fence; > >@@ -276,7 +351,7 @@ > continue; > > if (!dma_fence_get_rcu(fence)) { >- kfree(dst_list); >+ reservation_object_list_free(dst_list); > src_list = rcu_dereference(src->fence); > goto retry; > } >@@ -306,8 +381,7 @@ > write_seqcount_end(&dst->seq); > preempt_enable(); > >- if (src_list) >- kfree_rcu(src_list, rcu); >+ reservation_object_list_free(src_list); > dma_fence_put(old); > > return 0; >@@ -385,13 +459,6 @@ > if (!dma_fence_get_rcu(shared[i])) > break; > } >- >- if (!pfence_excl && fence_excl) { >- shared[i] = fence_excl; >- fence_excl = NULL; >- ++i; >- ++shared_count; >- } > } > > if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { >@@ -406,6 +473,11 @@ > rcu_read_unlock(); > } while (ret); > >+ if (pfence_excl) >+ *pfence_excl = fence_excl; >+ else if (fence_excl) >+ shared[++shared_count] = fence_excl; >+ > if (!shared_count) { > kfree(shared); > shared = NULL; >@@ -413,9 +485,6 @@ > > *pshared_count = shared_count; > *pshared = shared; >- if (pfence_excl) >- *pfence_excl = fence_excl; >- > return ret; > } > EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 2019-08-31 15:01:11.839736167 -0500 >@@ -0,0 +1,323 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ */ >+ >+#undef pr_fmt >+#define pr_fmt(fmt) "kfd2kgd: " fmt >+ >+#include <linux/module.h> >+#include <linux/fdtable.h> >+#include <linux/uaccess.h> >+#include <linux/mmu_context.h> >+#include <linux/firmware.h> >+#include "amdgpu.h" >+#include "amdgpu_amdkfd.h" >+#include "sdma0/sdma0_4_2_2_offset.h" >+#include "sdma0/sdma0_4_2_2_sh_mask.h" >+#include "sdma1/sdma1_4_2_2_offset.h" >+#include "sdma1/sdma1_4_2_2_sh_mask.h" >+#include "sdma2/sdma2_4_2_2_offset.h" >+#include "sdma2/sdma2_4_2_2_sh_mask.h" >+#include "sdma3/sdma3_4_2_2_offset.h" >+#include "sdma3/sdma3_4_2_2_sh_mask.h" >+#include "sdma4/sdma4_4_2_2_offset.h" >+#include "sdma4/sdma4_4_2_2_sh_mask.h" >+#include "sdma5/sdma5_4_2_2_offset.h" >+#include "sdma5/sdma5_4_2_2_sh_mask.h" >+#include "sdma6/sdma6_4_2_2_offset.h" >+#include "sdma6/sdma6_4_2_2_sh_mask.h" >+#include "sdma7/sdma7_4_2_2_offset.h" >+#include "sdma7/sdma7_4_2_2_sh_mask.h" >+#include "v9_structs.h" >+#include "soc15.h" >+#include "soc15d.h" >+#include "amdgpu_amdkfd_gfx_v9.h" >+ >+#define HQD_N_REGS 56 >+#define DUMP_REG(addr) do { \ >+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ >+ break; \ >+ (*dump)[i][0] = (addr) << 2; \ >+ (*dump)[i++][1] = RREG32(addr); \ >+ } while (0) >+ >+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) >+{ >+ return (struct amdgpu_device *)kgd; >+} >+ >+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) >+{ >+ return (struct v9_sdma_mqd *)mqd; >+} >+ >+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, >+ unsigned int engine_id, >+ unsigned int queue_id) >+{ >+ uint32_t base[8] = { >+ SOC15_REG_OFFSET(SDMA0, 0, >+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA1, 0, >+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA2, 0, >+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA3, 0, >+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA4, 0, >+ mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA5, 0, >+ mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA6, 0, >+ mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, >+ SOC15_REG_OFFSET(SDMA7, 0, >+ mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL >+ }; >+ uint32_t retval; >+ >+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - >+ mmSDMA0_RLC0_RB_CNTL); >+ >+ pr_debug("sdma base address: 0x%x\n", retval); >+ >+ return retval; >+} >+ >+static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, >+ u32 instance, u32 offset) >+{ >+ switch (instance) { >+ case 0: >+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); >+ case 1: >+ return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); >+ case 2: >+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); >+ case 3: >+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); >+ case 4: >+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); >+ case 5: >+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); >+ case 6: >+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); >+ case 7: >+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); >+ default: >+ break; >+ } >+ return 0; >+} >+ >+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, >+ uint32_t __user *wptr, struct mm_struct *mm) >+{ >+ struct amdgpu_device *adev = get_amdgpu_device(kgd); >+ struct v9_sdma_mqd *m; >+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl; >+ unsigned long end_jiffies; >+ uint32_t data; >+ uint64_t data64; >+ uint64_t __user *wptr64 = (uint64_t __user *)wptr; >+ >+ m = get_sdma_mqd(mqd); >+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, >+ m->sdma_queue_id); >+ sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, >+ m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); >+ >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, >+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); >+ >+ end_jiffies = msecs_to_jiffies(2000) + jiffies; >+ while (true) { >+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); >+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) >+ break; >+ if (time_after(jiffies, end_jiffies)) >+ return -ETIME; >+ usleep_range(500, 1000); >+ } >+ data = RREG32(sdmax_gfx_context_cntl); >+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, >+ RESUME_CTX, 0); >+ WREG32(sdmax_gfx_context_cntl, data); >+ >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, >+ m->sdmax_rlcx_doorbell_offset); >+ >+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, >+ ENABLE, 1); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, >+ m->sdmax_rlcx_rb_rptr_hi); >+ >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); >+ if (read_user_wptr(mm, wptr64, data64)) { >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, >+ lower_32_bits(data64)); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, >+ upper_32_bits(data64)); >+ } else { >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, >+ m->sdmax_rlcx_rb_rptr); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, >+ m->sdmax_rlcx_rb_rptr_hi); >+ } >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); >+ >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, >+ m->sdmax_rlcx_rb_base_hi); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, >+ m->sdmax_rlcx_rb_rptr_addr_lo); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, >+ m->sdmax_rlcx_rb_rptr_addr_hi); >+ >+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, >+ RB_ENABLE, 1); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); >+ >+ return 0; >+} >+ >+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, >+ uint32_t engine_id, uint32_t queue_id, >+ uint32_t (**dump)[2], uint32_t *n_regs) >+{ >+ struct amdgpu_device *adev = get_amdgpu_device(kgd); >+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); >+ uint32_t i = 0, reg; >+#undef HQD_N_REGS >+#define HQD_N_REGS (19+6+7+10) >+ >+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); >+ if (*dump == NULL) >+ return -ENOMEM; >+ >+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) >+ DUMP_REG(sdma_base_addr + reg); >+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) >+ DUMP_REG(sdma_base_addr + reg); >+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; >+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) >+ DUMP_REG(sdma_base_addr + reg); >+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; >+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) >+ DUMP_REG(sdma_base_addr + reg); >+ >+ WARN_ON_ONCE(i != HQD_N_REGS); >+ *n_regs = i; >+ >+ return 0; >+} >+ >+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) >+{ >+ struct amdgpu_device *adev = get_amdgpu_device(kgd); >+ struct v9_sdma_mqd *m; >+ uint32_t sdma_base_addr; >+ uint32_t sdma_rlc_rb_cntl; >+ >+ m = get_sdma_mqd(mqd); >+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, >+ m->sdma_queue_id); >+ >+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); >+ >+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) >+ return true; >+ >+ return false; >+} >+ >+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, >+ unsigned int utimeout) >+{ >+ struct amdgpu_device *adev = get_amdgpu_device(kgd); >+ struct v9_sdma_mqd *m; >+ uint32_t sdma_base_addr; >+ uint32_t temp; >+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; >+ >+ m = get_sdma_mqd(mqd); >+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, >+ m->sdma_queue_id); >+ >+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); >+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); >+ >+ while (true) { >+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); >+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) >+ break; >+ if (time_after(jiffies, end_jiffies)) >+ return -ETIME; >+ usleep_range(500, 1000); >+ } >+ >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); >+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, >+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | >+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); >+ >+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); >+ m->sdmax_rlcx_rb_rptr_hi = >+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); >+ >+ return 0; >+} >+ >+static const struct kfd2kgd_calls kfd2kgd = { >+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, >+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, >+ .init_interrupts = kgd_gfx_v9_init_interrupts, >+ .hqd_load = kgd_gfx_v9_hqd_load, >+ .hqd_sdma_load = kgd_hqd_sdma_load, >+ .hqd_dump = kgd_gfx_v9_hqd_dump, >+ .hqd_sdma_dump = kgd_hqd_sdma_dump, >+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, >+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, >+ .hqd_destroy = kgd_gfx_v9_hqd_destroy, >+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy, >+ .address_watch_disable = kgd_gfx_v9_address_watch_disable, >+ .address_watch_execute = kgd_gfx_v9_address_watch_execute, >+ .wave_control_execute = kgd_gfx_v9_wave_control_execute, >+ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, >+ .get_atc_vmid_pasid_mapping_pasid = >+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, >+ .get_atc_vmid_pasid_mapping_valid = >+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, >+ .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, >+ .get_tile_config = kgd_gfx_v9_get_tile_config, >+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, >+ .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, >+ .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, >+ .get_hive_id = amdgpu_amdkfd_get_hive_id, >+}; >+ >+struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) >+{ >+ return (struct kfd2kgd_calls *)&kfd2kgd; >+} >+ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 2019-08-31 15:01:11.839736167 -0500 >@@ -87,7 +87,12 @@ > case CHIP_RAVEN: > kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); > break; >+ case CHIP_ARCTURUS: >+ kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); >+ break; > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); > break; > default: >@@ -651,8 +656,12 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; > >- if (adev->powerplay.pp_funcs && >- adev->powerplay.pp_funcs->switch_power_profile) >+ if (is_support_sw_smu(adev)) >+ smu_switch_power_profile(&adev->smu, >+ PP_SMC_POWER_PROFILE_COMPUTE, >+ !idle); >+ else if (adev->powerplay.pp_funcs && >+ adev->powerplay.pp_funcs->switch_power_profile) > amdgpu_dpm_switch_power_profile(adev, > PP_SMC_POWER_PROFILE_COMPUTE, > !idle); >@@ -714,6 +723,11 @@ > { > return NULL; > } >+ >+struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) >+{ >+ return NULL; >+} > > struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) > { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 2019-08-31 15:01:11.839736167 -0500 >@@ -27,7 +27,6 @@ > #include <linux/uaccess.h> > #include <linux/firmware.h> > #include <linux/mmu_context.h> >-#include <drm/drmP.h> > #include "amdgpu.h" > #include "amdgpu_amdkfd.h" > #include "amdgpu_ucode.h" >@@ -802,42 +801,6 @@ > return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; > } > >-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) >-{ >- struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >- uint32_t req = (1 << vmid) | >- (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */ >- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK | >- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK | >- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK | >- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK | >- GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK; >- >- mutex_lock(&adev->srbm_mutex); >- >- /* Use light weight invalidation. >- * >- * TODO 1: agree on the right set of invalidation registers for >- * KFD use. Use the last one for now. Invalidate only GCHUB as >- * SDMA is now moved to GCHUB >- * >- * TODO 2: support range-based invalidation, requires kfg2kgd >- * interface change >- */ >- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32), >- 0xffffffff); >- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32), >- 0x0000001f); >- >- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req); >- >- while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) & >- (1 << vmid))) >- cpu_relax(); >- >- mutex_unlock(&adev->srbm_mutex); >-} >- > static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) > { > signed long r; >@@ -878,7 +841,8 @@ > if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { > if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) > == pasid) { >- write_vmid_invalidate_request(kgd, vmid); >+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, >+ AMDGPU_GFXHUB_0, 0); > break; > } > } >@@ -896,7 +860,7 @@ > return 0; > } > >- write_vmid_invalidate_request(kgd, vmid); >+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); > return 0; > } > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 2019-08-31 15:01:11.839736167 -0500 >@@ -47,6 +47,7 @@ > #include "soc15d.h" > #include "mmhub_v1_0.h" > #include "gfxhub_v1_0.h" >+#include "gmc_v9_0.h" > > > #define V9_PIPE_PER_MEC (4) >@@ -58,66 +59,11 @@ > RESET_WAVES > }; > >-/* >- * Register access functions >- */ >- >-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, >- uint32_t sh_mem_config, >- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, >- uint32_t sh_mem_bases); >-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, >- unsigned int vmid); >-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); >-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >- uint32_t queue_id, uint32_t __user *wptr, >- uint32_t wptr_shift, uint32_t wptr_mask, >- struct mm_struct *mm); >-static int kgd_hqd_dump(struct kgd_dev *kgd, >- uint32_t pipe_id, uint32_t queue_id, >- uint32_t (**dump)[2], uint32_t *n_regs); >-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, >- uint32_t __user *wptr, struct mm_struct *mm); >-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, >- uint32_t engine_id, uint32_t queue_id, >- uint32_t (**dump)[2], uint32_t *n_regs); >-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, >- uint32_t pipe_id, uint32_t queue_id); >-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); >-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, >- enum kfd_preempt_type reset_type, >- unsigned int utimeout, uint32_t pipe_id, >- uint32_t queue_id); >-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, >- unsigned int utimeout); >-static int kgd_address_watch_disable(struct kgd_dev *kgd); >-static int kgd_address_watch_execute(struct kgd_dev *kgd, >- unsigned int watch_point_id, >- uint32_t cntl_val, >- uint32_t addr_hi, >- uint32_t addr_lo); >-static int kgd_wave_control_execute(struct kgd_dev *kgd, >- uint32_t gfx_index_val, >- uint32_t sq_cmd); >-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, >- unsigned int watch_point_id, >- unsigned int reg_offset); >- >-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, >- uint8_t vmid); >-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, >- uint8_t vmid); >-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, >- uint64_t page_table_base); >-static void set_scratch_backing_va(struct kgd_dev *kgd, >- uint64_t va, uint32_t vmid); >-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); >-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); > > /* Because of REG_GET_FIELD() being used, we put this function in the > * asic specific file. > */ >-static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, >+int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, > struct tile_config *config) > { > struct amdgpu_device *adev = (struct amdgpu_device *)kgd; >@@ -135,39 +81,6 @@ > return 0; > } > >-static const struct kfd2kgd_calls kfd2kgd = { >- .program_sh_mem_settings = kgd_program_sh_mem_settings, >- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, >- .init_interrupts = kgd_init_interrupts, >- .hqd_load = kgd_hqd_load, >- .hqd_sdma_load = kgd_hqd_sdma_load, >- .hqd_dump = kgd_hqd_dump, >- .hqd_sdma_dump = kgd_hqd_sdma_dump, >- .hqd_is_occupied = kgd_hqd_is_occupied, >- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, >- .hqd_destroy = kgd_hqd_destroy, >- .hqd_sdma_destroy = kgd_hqd_sdma_destroy, >- .address_watch_disable = kgd_address_watch_disable, >- .address_watch_execute = kgd_address_watch_execute, >- .wave_control_execute = kgd_wave_control_execute, >- .address_watch_get_offset = kgd_address_watch_get_offset, >- .get_atc_vmid_pasid_mapping_pasid = >- get_atc_vmid_pasid_mapping_pasid, >- .get_atc_vmid_pasid_mapping_valid = >- get_atc_vmid_pasid_mapping_valid, >- .set_scratch_backing_va = set_scratch_backing_va, >- .get_tile_config = amdgpu_amdkfd_get_tile_config, >- .set_vm_context_page_table_base = set_vm_context_page_table_base, >- .invalidate_tlbs = invalidate_tlbs, >- .invalidate_tlbs_vmid = invalidate_tlbs_vmid, >- .get_hive_id = amdgpu_amdkfd_get_hive_id, >-}; >- >-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) >-{ >- return (struct kfd2kgd_calls *)&kfd2kgd; >-} >- > static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) > { > return (struct amdgpu_device *)kgd; >@@ -215,7 +128,7 @@ > unlock_srbm(kgd); > } > >-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, >+void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, > uint32_t sh_mem_config, > uint32_t sh_mem_ape1_base, > uint32_t sh_mem_ape1_limit, >@@ -232,7 +145,7 @@ > unlock_srbm(kgd); > } > >-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, >+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, > unsigned int vmid) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); >@@ -293,7 +206,7 @@ > * but still works > */ > >-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) >+int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); > uint32_t mec; >@@ -343,7 +256,7 @@ > return (struct v9_sdma_mqd *)mqd; > } > >-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >+int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > uint32_t queue_id, uint32_t __user *wptr, > uint32_t wptr_shift, uint32_t wptr_mask, > struct mm_struct *mm) >@@ -438,7 +351,7 @@ > return 0; > } > >-static int kgd_hqd_dump(struct kgd_dev *kgd, >+int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, > uint32_t pipe_id, uint32_t queue_id, > uint32_t (**dump)[2], uint32_t *n_regs) > { >@@ -575,7 +488,7 @@ > return 0; > } > >-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, >+bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); >@@ -616,7 +529,7 @@ > return false; > } > >-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, >+int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, > enum kfd_preempt_type reset_type, > unsigned int utimeout, uint32_t pipe_id, > uint32_t queue_id) >@@ -704,7 +617,7 @@ > return 0; > } > >-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, >+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, > uint8_t vmid) > { > uint32_t reg; >@@ -715,7 +628,7 @@ > return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; > } > >-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, >+uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, > uint8_t vmid) > { > uint32_t reg; >@@ -754,10 +667,10 @@ > return 0; > } > >-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) >+int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) > { > struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >- int vmid; >+ int vmid, i; > struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > uint32_t flush_type = 0; > >@@ -773,11 +686,12 @@ > for (vmid = 0; vmid < 16; vmid++) { > if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) > continue; >- if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { >- if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) >+ if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { >+ if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) > == pasid) { >- amdgpu_gmc_flush_gpu_tlb(adev, vmid, >- flush_type); >+ for (i = 0; i < adev->num_vmhubs; i++) >+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, >+ i, flush_type); > break; > } > } >@@ -786,9 +700,10 @@ > return 0; > } > >-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) >+int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) > { > struct amdgpu_device *adev = (struct amdgpu_device *) kgd; >+ int i; > > if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { > pr_err("non kfd vmid %d\n", vmid); >@@ -810,16 +725,18 @@ > * TODO 2: support range-based invalidation, requires kfg2kgd > * interface change > */ >- amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); >+ for (i = 0; i < adev->num_vmhubs; i++) >+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); >+ > return 0; > } > >-static int kgd_address_watch_disable(struct kgd_dev *kgd) >+int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) > { > return 0; > } > >-static int kgd_address_watch_execute(struct kgd_dev *kgd, >+int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, > unsigned int watch_point_id, > uint32_t cntl_val, > uint32_t addr_hi, >@@ -828,7 +745,7 @@ > return 0; > } > >-static int kgd_wave_control_execute(struct kgd_dev *kgd, >+int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, > uint32_t gfx_index_val, > uint32_t sq_cmd) > { >@@ -853,14 +770,14 @@ > return 0; > } > >-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, >+uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, > unsigned int watch_point_id, > unsigned int reg_offset) > { > return 0; > } > >-static void set_scratch_backing_va(struct kgd_dev *kgd, >+void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, > uint64_t va, uint32_t vmid) > { > /* No longer needed on GFXv9. The scratch base address is >@@ -869,7 +786,7 @@ > */ > } > >-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, >+void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, > uint64_t page_table_base) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); >@@ -884,7 +801,45 @@ > * now, all processes share the same address space size, like > * on GFX8 and older. > */ >- mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); >+ if (adev->asic_type == CHIP_ARCTURUS) { >+ /* Two MMHUBs */ >+ mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); >+ mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); >+ } else >+ mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); > > gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); > } >+ >+static const struct kfd2kgd_calls kfd2kgd = { >+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, >+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, >+ .init_interrupts = kgd_gfx_v9_init_interrupts, >+ .hqd_load = kgd_gfx_v9_hqd_load, >+ .hqd_sdma_load = kgd_hqd_sdma_load, >+ .hqd_dump = kgd_gfx_v9_hqd_dump, >+ .hqd_sdma_dump = kgd_hqd_sdma_dump, >+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, >+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, >+ .hqd_destroy = kgd_gfx_v9_hqd_destroy, >+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy, >+ .address_watch_disable = kgd_gfx_v9_address_watch_disable, >+ .address_watch_execute = kgd_gfx_v9_address_watch_execute, >+ .wave_control_execute = kgd_gfx_v9_wave_control_execute, >+ .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, >+ .get_atc_vmid_pasid_mapping_pasid = >+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, >+ .get_atc_vmid_pasid_mapping_valid = >+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, >+ .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, >+ .get_tile_config = kgd_gfx_v9_get_tile_config, >+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, >+ .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, >+ .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, >+ .get_hive_id = amdgpu_amdkfd_get_hive_id, >+}; >+ >+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) >+{ >+ return (struct kfd2kgd_calls *)&kfd2kgd; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 2019-08-31 15:01:11.839736167 -0500 >@@ -0,0 +1,69 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ */ >+ >+ >+ >+void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, >+ uint32_t sh_mem_config, >+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, >+ uint32_t sh_mem_bases); >+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, >+ unsigned int vmid); >+int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); >+int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >+ uint32_t queue_id, uint32_t __user *wptr, >+ uint32_t wptr_shift, uint32_t wptr_mask, >+ struct mm_struct *mm); >+int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, >+ uint32_t pipe_id, uint32_t queue_id, >+ uint32_t (**dump)[2], uint32_t *n_regs); >+bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, >+ uint32_t pipe_id, uint32_t queue_id); >+int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, >+ enum kfd_preempt_type reset_type, >+ unsigned int utimeout, uint32_t pipe_id, >+ uint32_t queue_id); >+int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); >+int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, >+ unsigned int watch_point_id, >+ uint32_t cntl_val, >+ uint32_t addr_hi, >+ uint32_t addr_lo); >+int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, >+ uint32_t gfx_index_val, >+ uint32_t sq_cmd); >+uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, >+ unsigned int watch_point_id, >+ unsigned int reg_offset); >+ >+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, >+ uint8_t vmid); >+uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, >+ uint8_t vmid); >+void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, >+ uint64_t page_table_base); >+void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, >+ uint64_t va, uint32_t vmid); >+int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); >+int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); >+int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, >+ struct tile_config *config); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 2019-08-31 15:01:11.840736167 -0500 >@@ -218,7 +218,7 @@ > static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, > struct amdgpu_amdkfd_fence *ef) > { >- struct reservation_object *resv = bo->tbo.resv; >+ struct reservation_object *resv = bo->tbo.base.resv; > struct reservation_object_list *old, *new; > unsigned int i, j, k; > >@@ -812,7 +812,7 @@ > struct amdgpu_bo *pd = peer_vm->root.base.bo; > > ret = amdgpu_sync_resv(NULL, >- sync, pd->tbo.resv, >+ sync, pd->tbo.base.resv, > AMDGPU_FENCE_OWNER_KFD, false); > if (ret) > return ret; >@@ -887,7 +887,7 @@ > AMDGPU_FENCE_OWNER_KFD, false); > if (ret) > goto wait_pd_fail; >- ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); >+ ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); > if (ret) > goto reserve_shared_fail; > amdgpu_bo_fence(vm->root.base.bo, >@@ -1090,7 +1090,7 @@ > */ > if (flags & ALLOC_MEM_FLAGS_VRAM) { > domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; >- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; >+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; > alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? > AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : > AMDGPU_GEM_CREATE_NO_CPU_ACCESS; >@@ -2133,7 +2133,7 @@ > * Add process eviction fence to bo so they can > * evict each other. > */ >- ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1); >+ ret = reservation_object_reserve_shared(gws_bo->tbo.base.resv, 1); > if (ret) > goto reserve_shared_fail; > amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 2019-08-31 15:01:11.839736167 -0500 >@@ -140,6 +140,7 @@ > struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); > struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); > struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); >+struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); > struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); > > bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c 2019-08-31 15:01:11.840736167 -0500 >@@ -574,6 +574,7 @@ > { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, > { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, > { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, >+ { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX }, > { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, > { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, > { 0, 0, 0, 0, 0 }, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 2019-08-31 15:01:11.840736167 -0500 >@@ -1505,6 +1505,7 @@ > struct amdgpu_connector_atom_dig *amdgpu_dig_connector; > struct drm_encoder *encoder; > struct amdgpu_encoder *amdgpu_encoder; >+ struct i2c_adapter *ddc = NULL; > uint32_t subpixel_order = SubPixelNone; > bool shared_ddc = false; > bool is_dp_bridge = false; >@@ -1574,17 +1575,21 @@ > amdgpu_connector->con_priv = amdgpu_dig_connector; > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); >- if (amdgpu_connector->ddc_bus) >+ if (amdgpu_connector->ddc_bus) { > has_aux = true; >- else >+ ddc = &amdgpu_connector->ddc_bus->adapter; >+ } else { > DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ } > } > switch (connector_type) { > case DRM_MODE_CONNECTOR_VGA: > case DRM_MODE_CONNECTOR_DVIA: > default: >- drm_connector_init(dev, &amdgpu_connector->base, >- &amdgpu_connector_dp_funcs, connector_type); >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_dp_funcs, >+ connector_type, >+ ddc); > drm_connector_helper_add(&amdgpu_connector->base, > &amdgpu_connector_dp_helper_funcs); > connector->interlace_allowed = true; >@@ -1602,8 +1607,10 @@ > case DRM_MODE_CONNECTOR_HDMIA: > case DRM_MODE_CONNECTOR_HDMIB: > case DRM_MODE_CONNECTOR_DisplayPort: >- drm_connector_init(dev, &amdgpu_connector->base, >- &amdgpu_connector_dp_funcs, connector_type); >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_dp_funcs, >+ connector_type, >+ ddc); > drm_connector_helper_add(&amdgpu_connector->base, > &amdgpu_connector_dp_helper_funcs); > drm_object_attach_property(&amdgpu_connector->base.base, >@@ -1644,8 +1651,10 @@ > break; > case DRM_MODE_CONNECTOR_LVDS: > case DRM_MODE_CONNECTOR_eDP: >- drm_connector_init(dev, &amdgpu_connector->base, >- &amdgpu_connector_edp_funcs, connector_type); >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_edp_funcs, >+ connector_type, >+ ddc); > drm_connector_helper_add(&amdgpu_connector->base, > &amdgpu_connector_dp_helper_funcs); > drm_object_attach_property(&amdgpu_connector->base.base, >@@ -1659,13 +1668,18 @@ > } else { > switch (connector_type) { > case DRM_MODE_CONNECTOR_VGA: >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); > if (!amdgpu_connector->ddc_bus) > DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ else >+ ddc = &amdgpu_connector->ddc_bus->adapter; > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_vga_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); > amdgpu_connector->dac_load_detect = true; > drm_object_attach_property(&amdgpu_connector->base.base, > adev->mode_info.load_detect_property, >@@ -1679,13 +1693,18 @@ > connector->doublescan_allowed = true; > break; > case DRM_MODE_CONNECTOR_DVIA: >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); > if (!amdgpu_connector->ddc_bus) > DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ else >+ ddc = &amdgpu_connector->ddc_bus->adapter; > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_vga_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); > amdgpu_connector->dac_load_detect = true; > drm_object_attach_property(&amdgpu_connector->base.base, > adev->mode_info.load_detect_property, >@@ -1704,13 +1723,18 @@ > if (!amdgpu_dig_connector) > goto failed; > amdgpu_connector->con_priv = amdgpu_dig_connector; >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); > if (!amdgpu_connector->ddc_bus) > DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ else >+ ddc = &amdgpu_connector->ddc_bus->adapter; > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_dvi_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); > subpixel_order = SubPixelHorizontalRGB; > drm_object_attach_property(&amdgpu_connector->base.base, > adev->mode_info.coherent_mode_property, >@@ -1754,13 +1778,18 @@ > if (!amdgpu_dig_connector) > goto failed; > amdgpu_connector->con_priv = amdgpu_dig_connector; >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); > if (!amdgpu_connector->ddc_bus) > DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ else >+ ddc = &amdgpu_connector->ddc_bus->adapter; > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_dvi_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); > drm_object_attach_property(&amdgpu_connector->base.base, > adev->mode_info.coherent_mode_property, > 1); >@@ -1796,15 +1825,20 @@ > if (!amdgpu_dig_connector) > goto failed; > amdgpu_connector->con_priv = amdgpu_dig_connector; >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); >- if (amdgpu_connector->ddc_bus) >+ if (amdgpu_connector->ddc_bus) { > has_aux = true; >- else >+ ddc = &amdgpu_connector->ddc_bus->adapter; >+ } else { > DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ } > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_dp_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); > subpixel_order = SubPixelHorizontalRGB; > drm_object_attach_property(&amdgpu_connector->base.base, > adev->mode_info.coherent_mode_property, >@@ -1838,15 +1872,20 @@ > if (!amdgpu_dig_connector) > goto failed; > amdgpu_connector->con_priv = amdgpu_dig_connector; >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); >- if (amdgpu_connector->ddc_bus) >+ if (amdgpu_connector->ddc_bus) { > has_aux = true; >- else >+ ddc = &amdgpu_connector->ddc_bus->adapter; >+ } else { > DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ } > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_edp_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); > drm_object_attach_property(&amdgpu_connector->base.base, > dev->mode_config.scaling_mode_property, > DRM_MODE_SCALE_FULLSCREEN); >@@ -1859,13 +1898,18 @@ > if (!amdgpu_dig_connector) > goto failed; > amdgpu_connector->con_priv = amdgpu_dig_connector; >- drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type); >- drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); > if (i2c_bus->valid) { > amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); > if (!amdgpu_connector->ddc_bus) > DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); >+ else >+ ddc = &amdgpu_connector->ddc_bus->adapter; > } >+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base, >+ &amdgpu_connector_lvds_funcs, >+ connector_type, >+ ddc); >+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); > drm_object_attach_property(&amdgpu_connector->base.base, > dev->mode_config.scaling_mode_property, > DRM_MODE_SCALE_FULLSCREEN); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 2019-08-31 15:01:12.278736205 -0500 >@@ -402,7 +402,7 @@ > struct ttm_operation_ctx ctx = { > .interruptible = true, > .no_wait_gpu = false, >- .resv = bo->tbo.resv, >+ .resv = bo->tbo.base.resv, > .flags = 0 > }; > uint32_t domain; >@@ -730,7 +730,7 @@ > > list_for_each_entry(e, &p->validated, tv.head) { > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); >- struct reservation_object *resv = bo->tbo.resv; >+ struct reservation_object *resv = bo->tbo.base.resv; > > r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, > amdgpu_bo_explicit_sync(bo)); >@@ -1732,7 +1732,7 @@ > *map = mapping; > > /* Double check that the BO is reserved by this CS */ >- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) >+ if (reservation_object_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) > return -EINVAL; > > if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 2019-08-31 15:01:11.840736167 -0500 >@@ -42,7 +42,7 @@ > [AMDGPU_HW_IP_VCN_JPEG] = 1, > }; > >-static int amdgput_ctx_total_num_entities(void) >+static int amdgpu_ctx_total_num_entities(void) > { > unsigned i, num_entities = 0; > >@@ -73,8 +73,8 @@ > struct drm_file *filp, > struct amdgpu_ctx *ctx) > { >- unsigned num_entities = amdgput_ctx_total_num_entities(); >- unsigned i, j; >+ unsigned num_entities = amdgpu_ctx_total_num_entities(); >+ unsigned i, j, k; > int r; > > if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) >@@ -123,7 +123,7 @@ > for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { > struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; > struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; >- unsigned num_rings; >+ unsigned num_rings = 0; > unsigned num_rqs = 0; > > switch (i) { >@@ -154,16 +154,26 @@ > num_rings = 1; > break; > case AMDGPU_HW_IP_VCN_DEC: >- rings[0] = &adev->vcn.ring_dec; >- num_rings = 1; >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ rings[num_rings++] = &adev->vcn.inst[j].ring_dec; >+ } > break; > case AMDGPU_HW_IP_VCN_ENC: >- rings[0] = &adev->vcn.ring_enc[0]; >- num_rings = 1; >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ for (k = 0; k < adev->vcn.num_enc_rings; ++k) >+ rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; >+ } > break; > case AMDGPU_HW_IP_VCN_JPEG: >- rings[0] = &adev->vcn.ring_jpeg; >- num_rings = 1; >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; >+ } > break; > } > >@@ -197,7 +207,7 @@ > static void amdgpu_ctx_fini(struct kref *ref) > { > struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); >- unsigned num_entities = amdgput_ctx_total_num_entities(); >+ unsigned num_entities = amdgpu_ctx_total_num_entities(); > struct amdgpu_device *adev = ctx->adev; > unsigned i, j; > >@@ -279,10 +289,7 @@ > > ctx = container_of(ref, struct amdgpu_ctx, refcount); > >- num_entities = 0; >- for (i = 0; i < AMDGPU_HW_IP_NUM; i++) >- num_entities += amdgpu_ctx_num_entities[i]; >- >+ num_entities = amdgpu_ctx_total_num_entities(); > for (i = 0; i < num_entities; i++) > drm_sched_entity_destroy(&ctx->entities[0][i].entity); > >@@ -344,7 +351,7 @@ > { > struct amdgpu_ctx *ctx; > struct amdgpu_ctx_mgr *mgr; >- uint32_t ras_counter; >+ unsigned long ras_counter; > > if (!fpriv) > return -EINVAL; >@@ -514,7 +521,7 @@ > void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, > enum drm_sched_priority priority) > { >- unsigned num_entities = amdgput_ctx_total_num_entities(); >+ unsigned num_entities = amdgpu_ctx_total_num_entities(); > enum drm_sched_priority ctx_prio; > unsigned i; > >@@ -534,21 +541,24 @@ > struct drm_sched_entity *entity) > { > struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); >- unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); >- struct dma_fence *other = centity->fences[idx]; >+ struct dma_fence *other; >+ unsigned idx; >+ long r; > >- if (other) { >- signed long r; >- r = dma_fence_wait(other, true); >- if (r < 0) { >- if (r != -ERESTARTSYS) >- DRM_ERROR("Error (%ld) waiting for fence!\n", r); >+ spin_lock(&ctx->ring_lock); >+ idx = centity->sequence & (amdgpu_sched_jobs - 1); >+ other = dma_fence_get(centity->fences[idx]); >+ spin_unlock(&ctx->ring_lock); > >- return r; >- } >- } >+ if (!other) >+ return 0; > >- return 0; >+ r = dma_fence_wait(other, true); >+ if (r < 0 && r != -ERESTARTSYS) >+ DRM_ERROR("Error (%ld) waiting for fence!\n", r); >+ >+ dma_fence_put(other); >+ return r; > } > > void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) >@@ -559,7 +569,7 @@ > > long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) > { >- unsigned num_entities = amdgput_ctx_total_num_entities(); >+ unsigned num_entities = amdgpu_ctx_total_num_entities(); > struct amdgpu_ctx *ctx; > struct idr *idp; > uint32_t id, i; >@@ -581,7 +591,7 @@ > > void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) > { >- unsigned num_entities = amdgput_ctx_total_num_entities(); >+ unsigned num_entities = amdgpu_ctx_total_num_entities(); > struct amdgpu_ctx *ctx; > struct idr *idp; > uint32_t id, i; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 2019-08-31 15:01:11.840736167 -0500 >@@ -49,8 +49,8 @@ > enum drm_sched_priority override_priority; > struct mutex lock; > atomic_t guilty; >- uint32_t ras_counter_ce; >- uint32_t ras_counter_ue; >+ unsigned long ras_counter_ce; >+ unsigned long ras_counter_ue; > }; > > struct amdgpu_ctx_mgr { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 2019-08-31 15:01:11.841736167 -0500 >@@ -70,7 +70,11 @@ > MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); > > #define AMDGPU_RESUME_MS 2000 > >@@ -98,7 +102,11 @@ > "VEGA12", > "VEGA20", > "RAVEN", >+ "ARCTURUS", >+ "RENOIR", > "NAVI10", >+ "NAVI14", >+ "NAVI12", > "LAST", > }; > >@@ -413,6 +421,40 @@ > } > > /** >+ * amdgpu_invalid_rreg64 - dummy 64 bit reg read function >+ * >+ * @adev: amdgpu device pointer >+ * @reg: offset of register >+ * >+ * Dummy register read function. Used for register blocks >+ * that certain asics don't have (all asics). >+ * Returns the value in the register. >+ */ >+static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) >+{ >+ DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); >+ BUG(); >+ return 0; >+} >+ >+/** >+ * amdgpu_invalid_wreg64 - dummy reg write function >+ * >+ * @adev: amdgpu device pointer >+ * @reg: offset of register >+ * @v: value to write to the register >+ * >+ * Dummy register read function. Used for register blocks >+ * that certain asics don't have (all asics). >+ */ >+static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) >+{ >+ DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", >+ reg, v); >+ BUG(); >+} >+ >+/** > * amdgpu_block_invalid_rreg - dummy reg read function > * > * @adev: amdgpu device pointer >@@ -1384,9 +1426,21 @@ > else > chip_name = "raven"; > break; >+ case CHIP_ARCTURUS: >+ chip_name = "arcturus"; >+ break; >+ case CHIP_RENOIR: >+ chip_name = "renoir"; >+ break; > case CHIP_NAVI10: > chip_name = "navi10"; > break; >+ case CHIP_NAVI14: >+ chip_name = "navi14"; >+ break; >+ case CHIP_NAVI12: >+ chip_name = "navi12"; >+ break; > } > > snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); >@@ -1529,7 +1583,10 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >- if (adev->asic_type == CHIP_RAVEN) >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: >+ if (adev->asic_type == CHIP_RAVEN || >+ adev->asic_type == CHIP_RENOIR) > adev->family = AMDGPU_FAMILY_RV; > else > adev->family = AMDGPU_FAMILY_AI; >@@ -1539,6 +1596,8 @@ > return r; > break; > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > adev->family = AMDGPU_FAMILY_NV; > > r = nv_set_ip_blocks(adev); >@@ -1560,9 +1619,6 @@ > r = amdgpu_virt_request_full_gpu(adev, true); > if (r) > return -EAGAIN; >- >- /* query the reg access mode at the very beginning */ >- amdgpu_virt_init_reg_access_mode(adev); > } > > adev->pm.pp_feature = amdgpu_pp_feature_mask; >@@ -1665,28 +1721,34 @@ > > if (adev->asic_type >= CHIP_VEGA10) { > for (i = 0; i < adev->num_ip_blocks; i++) { >- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { >- if (adev->in_gpu_reset || adev->in_suspend) { >- if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) >- break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ >- r = adev->ip_blocks[i].version->funcs->resume(adev); >- if (r) { >- DRM_ERROR("resume of IP block <%s> failed %d\n", >+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) >+ continue; >+ >+ /* no need to do the fw loading again if already done*/ >+ if (adev->ip_blocks[i].status.hw == true) >+ break; >+ >+ if (adev->in_gpu_reset || adev->in_suspend) { >+ r = adev->ip_blocks[i].version->funcs->resume(adev); >+ if (r) { >+ DRM_ERROR("resume of IP block <%s> failed %d\n", > adev->ip_blocks[i].version->funcs->name, r); >- return r; >- } >- } else { >- r = adev->ip_blocks[i].version->funcs->hw_init(adev); >- if (r) { >- DRM_ERROR("hw_init of IP block <%s> failed %d\n", >- adev->ip_blocks[i].version->funcs->name, r); >- return r; >- } >+ return r; >+ } >+ } else { >+ r = adev->ip_blocks[i].version->funcs->hw_init(adev); >+ if (r) { >+ DRM_ERROR("hw_init of IP block <%s> failed %d\n", >+ adev->ip_blocks[i].version->funcs->name, r); >+ return r; > } >- adev->ip_blocks[i].status.hw = true; > } >+ >+ adev->ip_blocks[i].status.hw = true; >+ break; > } > } >+ > r = amdgpu_pm_load_smu_firmware(adev, &smu_version); > > return r; >@@ -2128,7 +2190,9 @@ > if (r) { > DRM_ERROR("suspend of IP block <%s> failed %d\n", > adev->ip_blocks[i].version->funcs->name, r); >+ return r; > } >+ adev->ip_blocks[i].status.hw = false; > } > } > >@@ -2163,6 +2227,25 @@ > DRM_ERROR("suspend of IP block <%s> failed %d\n", > adev->ip_blocks[i].version->funcs->name, r); > } >+ adev->ip_blocks[i].status.hw = false; >+ /* handle putting the SMC in the appropriate state */ >+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { >+ if (is_support_sw_smu(adev)) { >+ /* todo */ >+ } else if (adev->powerplay.pp_funcs && >+ adev->powerplay.pp_funcs->set_mp1_state) { >+ r = adev->powerplay.pp_funcs->set_mp1_state( >+ adev->powerplay.pp_handle, >+ adev->mp1_state); >+ if (r) { >+ DRM_ERROR("SMC failed to set mp1 state %d, %d\n", >+ adev->mp1_state, r); >+ return r; >+ } >+ } >+ } >+ >+ adev->ip_blocks[i].status.hw = false; > } > > return 0; >@@ -2215,6 +2298,7 @@ > for (j = 0; j < adev->num_ip_blocks; j++) { > block = &adev->ip_blocks[j]; > >+ block->status.hw = false; > if (block->version->type != ip_order[i] || > !block->status.valid) > continue; >@@ -2223,6 +2307,7 @@ > DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); > if (r) > return r; >+ block->status.hw = true; > } > } > >@@ -2250,13 +2335,15 @@ > block = &adev->ip_blocks[j]; > > if (block->version->type != ip_order[i] || >- !block->status.valid) >+ !block->status.valid || >+ block->status.hw) > continue; > > r = block->version->funcs->hw_init(adev); > DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); > if (r) > return r; >+ block->status.hw = true; > } > } > >@@ -2280,17 +2367,19 @@ > int i, r; > > for (i = 0; i < adev->num_ip_blocks; i++) { >- if (!adev->ip_blocks[i].status.valid) >+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) > continue; > if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || > adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || > adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { >+ > r = adev->ip_blocks[i].version->funcs->resume(adev); > if (r) { > DRM_ERROR("resume of IP block <%s> failed %d\n", > adev->ip_blocks[i].version->funcs->name, r); > return r; > } >+ adev->ip_blocks[i].status.hw = true; > } > } > >@@ -2315,7 +2404,7 @@ > int i, r; > > for (i = 0; i < adev->num_ip_blocks; i++) { >- if (!adev->ip_blocks[i].status.valid) >+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) > continue; > if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || > adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || >@@ -2328,6 +2417,7 @@ > adev->ip_blocks[i].version->funcs->name, r); > return r; > } >+ adev->ip_blocks[i].status.hw = true; > } > > return 0; >@@ -2426,6 +2516,11 @@ > #endif > #if defined(CONFIG_DRM_AMD_DC_DCN2_0) > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: >+#endif >+#if defined(CONFIG_DRM_AMD_DC_DCN2_1) >+ case CHIP_RENOIR: > #endif > return amdgpu_dc != 0; > #endif >@@ -2509,6 +2604,8 @@ > adev->pcie_wreg = &amdgpu_invalid_wreg; > adev->pciep_rreg = &amdgpu_invalid_rreg; > adev->pciep_wreg = &amdgpu_invalid_wreg; >+ adev->pcie_rreg64 = &amdgpu_invalid_rreg64; >+ adev->pcie_wreg64 = &amdgpu_invalid_wreg64; > adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; > adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; > adev->didt_rreg = &amdgpu_invalid_rreg; >@@ -3389,7 +3486,7 @@ > amdgpu_virt_init_data_exchange(adev); > amdgpu_virt_release_full_gpu(adev, true); > if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { >- atomic_inc(&adev->vram_lost_counter); >+ amdgpu_inc_vram_lost(adev); > r = amdgpu_device_recover_vram(adev); > } > >@@ -3431,6 +3528,7 @@ > case CHIP_VEGA20: > case CHIP_VEGA10: > case CHIP_VEGA12: >+ case CHIP_RAVEN: > break; > default: > goto disabled; >@@ -3554,7 +3652,7 @@ > vram_lost = amdgpu_device_check_vram_lost(tmp_adev); > if (vram_lost) { > DRM_INFO("VRAM is lost due to GPU reset!\n"); >- atomic_inc(&tmp_adev->vram_lost_counter); >+ amdgpu_inc_vram_lost(tmp_adev); > } > > r = amdgpu_gtt_mgr_recover( >@@ -3627,6 +3725,17 @@ > > atomic_inc(&adev->gpu_reset_counter); > adev->in_gpu_reset = 1; >+ switch (amdgpu_asic_reset_method(adev)) { >+ case AMD_RESET_METHOD_MODE1: >+ adev->mp1_state = PP_MP1_STATE_SHUTDOWN; >+ break; >+ case AMD_RESET_METHOD_MODE2: >+ adev->mp1_state = PP_MP1_STATE_RESET; >+ break; >+ default: >+ adev->mp1_state = PP_MP1_STATE_NONE; >+ break; >+ } > /* Block kfd: SRIOV would do it separately */ > if (!amdgpu_sriov_vf(adev)) > amdgpu_amdkfd_pre_reset(adev); >@@ -3640,6 +3749,7 @@ > if (!amdgpu_sriov_vf(adev)) > amdgpu_amdkfd_post_reset(adev); > amdgpu_vf_error_trans_all(adev); >+ adev->mp1_state = PP_MP1_STATE_NONE; > adev->in_gpu_reset = 0; > mutex_unlock(&adev->lock_reset); > } >@@ -3684,14 +3794,14 @@ > > if (hive && !mutex_trylock(&hive->reset_lock)) { > DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", >- job->base.id, hive->hive_id); >+ job ? job->base.id : -1, hive->hive_id); > return 0; > } > > /* Start with adev pre asic reset first for soft reset check.*/ > if (!amdgpu_device_lock_adev(adev, !hive)) { > DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", >- job->base.id); >+ job ? job->base.id : -1); > return 0; > } > >@@ -3732,7 +3842,7 @@ > if (!ring || !ring->sched.thread) > continue; > >- drm_sched_stop(&ring->sched, &job->base); >+ drm_sched_stop(&ring->sched, job ? &job->base : NULL); > } > } > >@@ -3757,9 +3867,7 @@ > > > /* Guilty job will be freed after this*/ >- r = amdgpu_device_pre_asic_reset(adev, >- job, >- &need_full_reset); >+ r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); > if (r) { > /*TODO Should we stop ?*/ > DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 2019-08-31 15:01:11.841736167 -0500 >@@ -191,7 +191,8 @@ > } > > if (!adev->enable_virtual_display) { >- r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); >+ r = amdgpu_bo_pin(new_abo, >+ amdgpu_display_supported_domains(adev, new_abo->flags)); > if (unlikely(r != 0)) { > DRM_ERROR("failed to pin new abo buffer before flip\n"); > goto unreserve; >@@ -204,7 +205,7 @@ > goto unpin; > } > >- r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, >+ r = reservation_object_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, > &work->shared_count, > &work->shared); > if (unlikely(r != 0)) { >@@ -495,13 +496,25 @@ > .create_handle = drm_gem_fb_create_handle, > }; > >-uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) >+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, >+ uint64_t bo_flags) > { > uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; > > #if defined(CONFIG_DRM_AMD_DC) >- if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && >- adev->flags & AMD_IS_APU && >+ /* >+ * if amdgpu_bo_support_uswc returns false it means that USWC mappings >+ * is not supported for this board. But this mapping is required >+ * to avoid hang caused by placement of scanout BO in GTT on certain >+ * APUs. So force the BO placement to VRAM in case this architecture >+ * will not allow USWC mappings. >+ * Also, don't allow GTT domain if the BO doens't have USWC falg set. >+ */ >+ if (adev->asic_type >= CHIP_CARRIZO && >+ adev->asic_type <= CHIP_RAVEN && >+ (adev->flags & AMD_IS_APU) && >+ (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && >+ amdgpu_bo_support_uswc(bo_flags) && > amdgpu_device_asic_has_dc_support(adev->asic_type)) > domain |= AMDGPU_GEM_DOMAIN_GTT; > #endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h 2019-08-31 15:01:11.841736167 -0500 >@@ -38,7 +38,8 @@ > int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp); > void amdgpu_display_update_priority(struct amdgpu_device *adev); >-uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); >+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, >+ uint64_t bo_flags); > struct drm_framebuffer * > amdgpu_display_user_framebuffer_create(struct drm_device *dev, > struct drm_file *file_priv, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 2019-08-31 15:01:11.841736167 -0500 >@@ -216,7 +216,7 @@ > * fences on the reservation object into a single exclusive > * fence. > */ >- r = __reservation_object_make_exclusive(bo->tbo.resv); >+ r = __reservation_object_make_exclusive(bo->tbo.base.resv); > if (r) > goto error_unreserve; > } >@@ -268,20 +268,6 @@ > } > > /** >- * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation >- * @obj: GEM BO >- * >- * Returns: >- * The BO's reservation object. >- */ >-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) >-{ >- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); >- >- return bo->tbo.resv; >-} >- >-/** > * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation > * @dma_buf: Shared DMA buffer > * @direction: Direction of DMA transfer >@@ -299,7 +285,7 @@ > struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > struct ttm_operation_ctx ctx = { true, false }; >- u32 domain = amdgpu_display_supported_domains(adev); >+ u32 domain = amdgpu_display_supported_domains(adev, bo->flags); > int ret; > bool reads = (direction == DMA_BIDIRECTIONAL || > direction == DMA_FROM_DEVICE); >@@ -339,14 +325,12 @@ > * @gobj: GEM BO > * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. > * >- * The main work is done by the &drm_gem_prime_export helper, which in turn >- * uses &amdgpu_gem_prime_res_obj. >+ * The main work is done by the &drm_gem_prime_export helper. > * > * Returns: > * Shared DMA buffer representing the GEM BO from the given device. > */ >-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, >- struct drm_gem_object *gobj, >+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, > int flags) > { > struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); >@@ -356,9 +340,9 @@ > bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) > return ERR_PTR(-EPERM); > >- buf = drm_gem_prime_export(dev, gobj, flags); >+ buf = drm_gem_prime_export(gobj, flags); > if (!IS_ERR(buf)) { >- buf->file->f_mapping = dev->anon_inode->i_mapping; >+ buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; > buf->ops = &amdgpu_dmabuf_ops; > } > >@@ -396,7 +380,7 @@ > bp.flags = 0; > bp.type = ttm_bo_type_sg; > bp.resv = resv; >- ww_mutex_lock(&resv->lock, NULL); >+ reservation_object_lock(resv, NULL); > ret = amdgpu_bo_create(adev, &bp, &bo); > if (ret) > goto error; >@@ -408,11 +392,11 @@ > if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) > bo->prime_shared_count = 1; > >- ww_mutex_unlock(&resv->lock); >- return &bo->gem_base; >+ reservation_object_unlock(resv); >+ return &bo->tbo.base; > > error: >- ww_mutex_unlock(&resv->lock); >+ reservation_object_unlock(resv); > return ERR_PTR(ret); > } > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 2019-08-31 15:01:11.841736167 -0500 >@@ -30,12 +30,10 @@ > amdgpu_gem_prime_import_sg_table(struct drm_device *dev, > struct dma_buf_attachment *attach, > struct sg_table *sg); >-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, >- struct drm_gem_object *gobj, >+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, > int flags); > struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, > struct dma_buf *dma_buf); >-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); > void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); > void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); > int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 2019-08-31 15:01:11.841736167 -0500 >@@ -130,13 +130,18 @@ > AMDGPU_VEGA20_DOORBELL_IH = 0x178, > /* MMSCH: 392~407 > * overlap the doorbell assignment with VCN as they are mutually exclusive >- * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD >+ * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD > */ >- AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ >+ AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */ > AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, > AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, > AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, > >+ AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */ >+ AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D, >+ AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E, >+ AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F, >+ > AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, > AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, > AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-08-31 15:01:11.841736167 -0500 >@@ -79,9 +79,10 @@ > * - 3.31.0 - Add support for per-flip tiling attribute changes with DC > * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. > * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. >+ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches > */ > #define KMS_DRIVER_MAJOR 3 >-#define KMS_DRIVER_MINOR 33 >+#define KMS_DRIVER_MINOR 34 > #define KMS_DRIVER_PATCHLEVEL 0 > > #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 >@@ -142,7 +143,7 @@ > int amdgpu_mcbp = 0; > int amdgpu_discovery = -1; > int amdgpu_mes = 0; >-int amdgpu_noretry; >+int amdgpu_noretry = 1; > > struct amdgpu_mgpu_info mgpu_info = { > .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), >@@ -610,7 +611,7 @@ > module_param_named(mes, amdgpu_mes, int, 0444); > > MODULE_PARM_DESC(noretry, >- "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); >+ "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))"); > module_param_named(noretry, amdgpu_noretry, int, 0644); > > #ifdef CONFIG_HSA_AMD >@@ -996,6 +997,11 @@ > /* Raven */ > {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, > {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, >+ /* Arcturus */ >+ {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, >+ {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, >+ {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, >+ {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, > /* Navi10 */ > {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, > {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, >@@ -1004,6 +1010,11 @@ > {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, > {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, > {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, >+ /* Navi14 */ >+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, >+ >+ /* Renoir */ >+ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, > > {0, 0, 0} > }; >@@ -1092,21 +1103,21 @@ > * unfortunately we can't detect certain > * hypervisors so just do this all the time. > */ >+ adev->mp1_state = PP_MP1_STATE_UNLOAD; > amdgpu_device_ip_suspend(adev); >+ adev->mp1_state = PP_MP1_STATE_NONE; > } > > static int amdgpu_pmops_suspend(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > >- struct drm_device *drm_dev = pci_get_drvdata(pdev); > return amdgpu_device_suspend(drm_dev, true, true); > } > > static int amdgpu_pmops_resume(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >- struct drm_device *drm_dev = pci_get_drvdata(pdev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > > /* GPU comes up enabled by the bios on resume */ > if (amdgpu_device_is_px(drm_dev)) { >@@ -1120,33 +1131,29 @@ > > static int amdgpu_pmops_freeze(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > >- struct drm_device *drm_dev = pci_get_drvdata(pdev); > return amdgpu_device_suspend(drm_dev, false, true); > } > > static int amdgpu_pmops_thaw(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > >- struct drm_device *drm_dev = pci_get_drvdata(pdev); > return amdgpu_device_resume(drm_dev, false, true); > } > > static int amdgpu_pmops_poweroff(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > >- struct drm_device *drm_dev = pci_get_drvdata(pdev); > return amdgpu_device_suspend(drm_dev, true, true); > } > > static int amdgpu_pmops_restore(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > >- struct drm_device *drm_dev = pci_get_drvdata(pdev); > return amdgpu_device_resume(drm_dev, false, true); > } > >@@ -1205,8 +1212,7 @@ > > static int amdgpu_pmops_runtime_idle(struct device *dev) > { >- struct pci_dev *pdev = to_pci_dev(dev); >- struct drm_device *drm_dev = pci_get_drvdata(pdev); >+ struct drm_device *drm_dev = dev_get_drvdata(dev); > struct drm_crtc *crtc; > > if (!amdgpu_device_is_px(drm_dev)) { >@@ -1373,7 +1379,7 @@ > .driver_features = > DRIVER_USE_AGP | DRIVER_ATOMIC | > DRIVER_GEM | >- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, >+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, > .load = amdgpu_driver_load_kms, > .open = amdgpu_driver_open_kms, > .postclose = amdgpu_driver_postclose_kms, >@@ -1397,7 +1403,6 @@ > .prime_fd_to_handle = drm_gem_prime_fd_to_handle, > .gem_prime_export = amdgpu_gem_prime_export, > .gem_prime_import = amdgpu_gem_prime_import, >- .gem_prime_res_obj = amdgpu_gem_prime_res_obj, > .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, > .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, > .gem_prime_vmap = amdgpu_gem_prime_vmap, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 2019-08-31 15:01:11.841736167 -0500 >@@ -131,6 +131,10 @@ > int aligned_size, size; > int height = mode_cmd->height; > u32 cpp; >+ u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | >+ AMDGPU_GEM_CREATE_VRAM_CLEARED | >+ AMDGPU_GEM_CREATE_CPU_GTT_USWC; > > info = drm_get_format_info(adev->ddev, mode_cmd); > cpp = info->cpp[0]; >@@ -138,15 +142,11 @@ > /* need to align pitch with crtc limits */ > mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, > fb_tiled); >- domain = amdgpu_display_supported_domains(adev); >- >+ domain = amdgpu_display_supported_domains(adev, flags); > height = ALIGN(mode_cmd->height, 8); > size = mode_cmd->pitches[0] * height; > aligned_size = ALIGN(size, PAGE_SIZE); >- ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, >- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | >- AMDGPU_GEM_CREATE_VRAM_CLEARED, >+ ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, > ttm_bo_type_kernel, NULL, &gobj); > if (ret) { > pr_err("failed to allocate framebuffer (%d)\n", aligned_size); >@@ -168,7 +168,6 @@ > dev_err(adev->dev, "FB failed to set tiling flags\n"); > } > >- > ret = amdgpu_bo_pin(abo, domain); > if (ret) { > amdgpu_bo_unreserve(abo); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 2019-08-31 15:01:11.841736167 -0500 >@@ -251,7 +251,9 @@ > } > mb(); > amdgpu_asic_flush_hdp(adev, NULL); >- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); >+ for (i = 0; i < adev->num_vmhubs; i++) >+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); >+ > return 0; > } > >@@ -310,9 +312,9 @@ > uint64_t flags) > { > #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS >- unsigned i,t,p; >+ unsigned t,p; > #endif >- int r; >+ int r, i; > > if (!adev->gart.ready) { > WARN(1, "trying to bind memory to uninitialized GART !\n"); >@@ -336,7 +338,8 @@ > > mb(); > amdgpu_asic_flush_hdp(adev, NULL); >- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); >+ for (i = 0; i < adev->num_vmhubs; i++) >+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); > return 0; > } > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 2019-08-31 15:01:11.841736167 -0500 >@@ -85,7 +85,7 @@ > } > return r; > } >- *obj = &bo->gem_base; >+ *obj = &bo->tbo.base; > > return 0; > } >@@ -134,7 +134,7 @@ > return -EPERM; > > if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && >- abo->tbo.resv != vm->root.base.bo->tbo.resv) >+ abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) > return -EPERM; > > r = amdgpu_bo_reserve(abo, false); >@@ -252,7 +252,7 @@ > if (r) > return r; > >- resv = vm->root.base.bo->tbo.resv; >+ resv = vm->root.base.bo->tbo.base.resv; > } > > r = amdgpu_gem_object_create(adev, size, args->in.alignment, >@@ -433,7 +433,7 @@ > return -ENOENT; > } > robj = gem_to_amdgpu_bo(gobj); >- ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, >+ ret = reservation_object_wait_timeout_rcu(robj->tbo.base.resv, true, true, > timeout); > > /* ret == 0 means not signaled, >@@ -689,7 +689,7 @@ > struct drm_amdgpu_gem_create_in info; > void __user *out = u64_to_user_ptr(args->value); > >- info.bo_size = robj->gem_base.size; >+ info.bo_size = robj->tbo.base.size; > info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; > info.domains = robj->preferred_domains; > info.domain_flags = robj->flags; >@@ -747,7 +747,8 @@ > struct amdgpu_device *adev = dev->dev_private; > struct drm_gem_object *gobj; > uint32_t handle; >- u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; >+ u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >+ AMDGPU_GEM_CREATE_CPU_GTT_USWC; > u32 domain; > int r; > >@@ -764,7 +765,7 @@ > args->size = (u64)args->pitch * args->height; > args->size = ALIGN(args->size, PAGE_SIZE); > domain = amdgpu_bo_get_preferred_pin_domain(adev, >- amdgpu_display_supported_domains(adev)); >+ amdgpu_display_supported_domains(adev, flags)); > r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, > ttm_bo_type_device, NULL, &gobj); > if (r) >@@ -819,8 +820,8 @@ > if (pin_count) > seq_printf(m, " pin count %d", pin_count); > >- dma_buf = READ_ONCE(bo->gem_base.dma_buf); >- attachment = READ_ONCE(bo->gem_base.import_attach); >+ dma_buf = READ_ONCE(bo->tbo.base.dma_buf); >+ attachment = READ_ONCE(bo->tbo.base.import_attach); > > if (attachment) > seq_printf(m, " imported from %p", dma_buf); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 2019-08-31 15:01:11.841736167 -0500 >@@ -31,7 +31,7 @@ > */ > > #define AMDGPU_GEM_DOMAIN_MAX 0x3 >-#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) >+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) > > void amdgpu_gem_object_free(struct drm_gem_object *obj); > int amdgpu_gem_object_open(struct drm_gem_object *obj, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 2019-08-31 15:01:11.842736167 -0500 >@@ -389,7 +389,7 @@ > dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); > } > >- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { >+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { > /* create MQD for each KGQ */ > for (i = 0; i < adev->gfx.num_gfx_rings; i++) { > ring = &adev->gfx.gfx_ring[i]; >@@ -437,7 +437,7 @@ > struct amdgpu_ring *ring = NULL; > int i; > >- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { >+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { > for (i = 0; i < adev->gfx.num_gfx_rings; i++) { > ring = &adev->gfx.gfx_ring[i]; > kfree(adev->gfx.me.mqd_backup[i]); >@@ -456,7 +456,7 @@ > } > > ring = &adev->gfx.kiq.ring; >- if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) >+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) > kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); > kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); > amdgpu_bo_free_kernel(&ring->mqd_obj, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 2019-08-31 15:01:11.842736167 -0500 >@@ -196,6 +196,8 @@ > uint32_t *dst); > void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, > u32 queue, u32 vmid); >+ int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); >+ int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); > }; > > struct amdgpu_ngg_buf { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 2019-08-31 15:01:11.842736167 -0500 >@@ -220,6 +220,14 @@ > const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); > u64 size_af, size_bf; > >+ if (amdgpu_sriov_vf(adev)) { >+ mc->agp_start = 0xffffffff; >+ mc->agp_end = 0x0; >+ mc->agp_size = 0; >+ >+ return; >+ } >+ > if (mc->fb_start > mc->gart_start) { > size_bf = (mc->fb_start & sixteen_gb_mask) - > ALIGN(mc->gart_end + 1, sixteen_gb); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 2019-08-31 15:01:11.842736167 -0500 >@@ -89,8 +89,8 @@ > */ > struct amdgpu_gmc_funcs { > /* flush the vm tlb via mmio */ >- void (*flush_gpu_tlb)(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type); >+ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type); > /* flush the vm tlb via ring */ > uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, > uint64_t pd_addr); >@@ -177,10 +177,11 @@ > > struct amdgpu_xgmi xgmi; > struct amdgpu_irq_src ecc_irq; >- struct ras_common_if *ras_if; >+ struct ras_common_if *umc_ras_if; >+ struct ras_common_if *mmhub_ras_if; > }; > >-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) >+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) > #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) > #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) > #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu.h 2019-08-31 15:01:11.839736167 -0500 >@@ -86,6 +86,8 @@ > #include "amdgpu_smu.h" > #include "amdgpu_discovery.h" > #include "amdgpu_mes.h" >+#include "amdgpu_umc.h" >+#include "amdgpu_mmhub.h" > > #define MAX_GPU_INSTANCE 16 > >@@ -532,6 +534,14 @@ > bool grbm_indexed; > }; > >+enum amd_reset_method { >+ AMD_RESET_METHOD_LEGACY = 0, >+ AMD_RESET_METHOD_MODE0, >+ AMD_RESET_METHOD_MODE1, >+ AMD_RESET_METHOD_MODE2, >+ AMD_RESET_METHOD_BACO >+}; >+ > /* > * ASIC specific functions. > */ >@@ -543,6 +553,7 @@ > u32 sh_num, u32 reg_offset, u32 *value); > void (*set_vga_state)(struct amdgpu_device *adev, bool state); > int (*reset)(struct amdgpu_device *adev); >+ enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); > /* get the reference clock */ > u32 (*get_xclk)(struct amdgpu_device *adev); > /* MM block clocks */ >@@ -627,6 +638,9 @@ > typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); > typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); > >+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); >+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); >+ > typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); > typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); > >@@ -648,6 +662,12 @@ > u32 ref_and_mask_cp9; > u32 ref_and_mask_sdma0; > u32 ref_and_mask_sdma1; >+ u32 ref_and_mask_sdma2; >+ u32 ref_and_mask_sdma3; >+ u32 ref_and_mask_sdma4; >+ u32 ref_and_mask_sdma5; >+ u32 ref_and_mask_sdma6; >+ u32 ref_and_mask_sdma7; > }; > > struct amdgpu_mmio_remap { >@@ -668,7 +688,7 @@ > void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, > bool use_doorbell, int doorbell_index, int doorbell_size); > void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, >- int doorbell_index); >+ int doorbell_index, int instance); > void (*enable_doorbell_aperture)(struct amdgpu_device *adev, > bool enable); > void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, >@@ -705,6 +725,9 @@ > int is_disable); > void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, > uint64_t *count); >+ uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); >+ void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, >+ uint32_t ficadl_val, uint32_t ficadh_val); > }; > /* Define the HW IP blocks will be used in driver , add more if necessary */ > enum amd_hw_ip_block_type { >@@ -712,6 +735,12 @@ > HDP_HWIP, > SDMA0_HWIP, > SDMA1_HWIP, >+ SDMA2_HWIP, >+ SDMA3_HWIP, >+ SDMA4_HWIP, >+ SDMA5_HWIP, >+ SDMA6_HWIP, >+ SDMA7_HWIP, > MMHUB_HWIP, > ATHUB_HWIP, > NBIO_HWIP, >@@ -728,10 +757,12 @@ > NBIF_HWIP, > THM_HWIP, > CLK_HWIP, >+ UMC_HWIP, >+ RSMU_HWIP, > MAX_HWIP > }; > >-#define HWIP_MAX_INSTANCE 6 >+#define HWIP_MAX_INSTANCE 8 > > struct amd_powerplay { > void *pp_handle; >@@ -758,7 +789,6 @@ > int usec_timeout; > const struct amdgpu_asic_funcs *asic_funcs; > bool shutdown; >- bool need_dma32; > bool need_swiotlb; > bool accel_working; > struct notifier_block acpi_nb; >@@ -803,6 +833,8 @@ > amdgpu_wreg_t pcie_wreg; > amdgpu_rreg_t pciep_rreg; > amdgpu_wreg_t pciep_wreg; >+ amdgpu_rreg64_t pcie_rreg64; >+ amdgpu_wreg64_t pcie_wreg64; > /* protects concurrent UVD register access */ > spinlock_t uvd_ctx_idx_lock; > amdgpu_rreg_t uvd_ctx_rreg; >@@ -836,6 +868,7 @@ > dma_addr_t dummy_page_addr; > struct amdgpu_vm_manager vm_manager; > struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; >+ unsigned num_vmhubs; > > /* memory management */ > struct amdgpu_mman mman; >@@ -915,6 +948,9 @@ > /* KFD */ > struct amdgpu_kfd_dev kfd; > >+ /* UMC */ >+ struct amdgpu_umc umc; >+ > /* display related functionality */ > struct amdgpu_display_manager dm; > >@@ -940,6 +976,7 @@ > > const struct amdgpu_nbio_funcs *nbio_funcs; > const struct amdgpu_df_funcs *df_funcs; >+ const struct amdgpu_mmhub_funcs *mmhub_funcs; > > /* delayed work_func for deferring clockgating during resume */ > struct delayed_work delayed_init_work; >@@ -965,6 +1002,7 @@ > /* record last mm index being written through WREG32*/ > unsigned long last_mm_index; > bool in_gpu_reset; >+ enum pp_mp1_state mp1_state; > struct mutex lock_reset; > struct amdgpu_doorbell_index doorbell_index; > >@@ -1033,6 +1071,8 @@ > #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) > #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) > #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) >+#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) >+#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) > #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) > #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) > #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) >@@ -1093,6 +1133,7 @@ > */ > #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) > #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) >+#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) > #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) > #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) > #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) >@@ -1110,6 +1151,7 @@ > #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) > #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) > #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) >+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); > > /* Common functions */ > bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 2019-08-31 15:01:11.842736167 -0500 >@@ -368,7 +368,8 @@ > * are broken on Navi10 and Navi14. > */ > if (needs_flush && (adev->asic_type < CHIP_VEGA10 || >- adev->asic_type == CHIP_NAVI10)) >+ adev->asic_type == CHIP_NAVI10 || >+ adev->asic_type == CHIP_NAVI14)) > continue; > > /* Good, we can use this VMID. Remember this submission as >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 2019-08-31 15:01:11.842736167 -0500 >@@ -408,23 +408,38 @@ > break; > case AMDGPU_HW_IP_VCN_DEC: > type = AMD_IP_BLOCK_TYPE_VCN; >- if (adev->vcn.ring_dec.sched.ready) >- ++num_rings; >+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { >+ if (adev->uvd.harvest_config & (1 << i)) >+ continue; >+ >+ if (adev->vcn.inst[i].ring_dec.sched.ready) >+ ++num_rings; >+ } > ib_start_alignment = 16; > ib_size_alignment = 16; > break; > case AMDGPU_HW_IP_VCN_ENC: > type = AMD_IP_BLOCK_TYPE_VCN; >- for (i = 0; i < adev->vcn.num_enc_rings; i++) >- if (adev->vcn.ring_enc[i].sched.ready) >- ++num_rings; >+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { >+ if (adev->uvd.harvest_config & (1 << i)) >+ continue; >+ >+ for (j = 0; j < adev->vcn.num_enc_rings; j++) >+ if (adev->vcn.inst[i].ring_enc[j].sched.ready) >+ ++num_rings; >+ } > ib_start_alignment = 64; > ib_size_alignment = 1; > break; > case AMDGPU_HW_IP_VCN_JPEG: > type = AMD_IP_BLOCK_TYPE_VCN; >- if (adev->vcn.ring_jpeg.sched.ready) >- ++num_rings; >+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { >+ if (adev->uvd.harvest_config & (1 << i)) >+ continue; >+ >+ if (adev->vcn.inst[i].ring_jpeg.sched.ready) >+ ++num_rings; >+ } > ib_start_alignment = 16; > ib_size_alignment = 16; > break; >@@ -1088,7 +1103,7 @@ > amdgpu_vm_fini(adev, &fpriv->vm); > > if (pasid) >- amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); >+ amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); > amdgpu_bo_unref(&pd); > > idr_for_each_entry(&fpriv->bo_list_handles, list, handle) >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 2019-08-31 15:01:11.842736167 -0500 >@@ -0,0 +1,31 @@ >+/* >+ * Copyright (C) 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included >+ * in all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN >+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >+ */ >+#ifndef __AMDGPU_MMHUB_H__ >+#define __AMDGPU_MMHUB_H__ >+ >+struct amdgpu_mmhub_funcs { >+ void (*ras_init)(struct amdgpu_device *adev); >+ void (*query_ras_error_count)(struct amdgpu_device *adev, >+ void *ras_error_status); >+}; >+ >+#endif >+ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 2019-08-31 15:01:11.842736167 -0500 >@@ -179,7 +179,7 @@ > if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) > continue; > >- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, >+ r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, > true, false, MAX_SCHEDULE_TIMEOUT); > if (r <= 0) > DRM_ERROR("(%ld) failed to wait for user bo\n", r); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 2019-08-31 15:01:11.842736167 -0500 >@@ -80,14 +80,11 @@ > if (bo->pin_count > 0) > amdgpu_bo_subtract_pin_size(bo); > >- if (bo->kfd_bo) >- amdgpu_amdkfd_unreserve_memory_limit(bo); >- > amdgpu_bo_kunmap(bo); > >- if (bo->gem_base.import_attach) >- drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); >- drm_gem_object_release(&bo->gem_base); >+ if (bo->tbo.base.import_attach) >+ drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); >+ drm_gem_object_release(&bo->tbo.base); > /* in case amdgpu_device_recover_vram got NULL of bo->parent */ > if (!list_empty(&bo->shadow_list)) { > mutex_lock(&adev->shadow_list_lock); >@@ -249,8 +246,9 @@ > bp.size = size; > bp.byte_align = align; > bp.domain = domain; >- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; >+ bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED >+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; >+ bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; > bp.type = ttm_bo_type_kernel; > bp.resv = NULL; > >@@ -413,6 +411,40 @@ > return false; > } > >+bool amdgpu_bo_support_uswc(u64 bo_flags) >+{ >+ >+#ifdef CONFIG_X86_32 >+ /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit >+ * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 >+ */ >+ return false; >+#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) >+ /* Don't try to enable write-combining when it can't work, or things >+ * may be slow >+ * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 >+ */ >+ >+#ifndef CONFIG_COMPILE_TEST >+#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ >+ thanks to write-combining >+#endif >+ >+ if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) >+ DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " >+ "better performance thanks to write-combining\n"); >+ return false; >+#else >+ /* For architectures that don't support WC memory, >+ * mask out the WC flag from the BO >+ */ >+ if (!drm_arch_can_wc_memory()) >+ return false; >+ >+ return true; >+#endif >+} >+ > static int amdgpu_bo_do_create(struct amdgpu_device *adev, > struct amdgpu_bo_param *bp, > struct amdgpu_bo **bo_ptr) >@@ -454,7 +486,7 @@ > bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); > if (bo == NULL) > return -ENOMEM; >- drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); >+ drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size); > INIT_LIST_HEAD(&bo->shadow_list); > bo->vm_bo = NULL; > bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : >@@ -466,33 +498,8 @@ > > bo->flags = bp->flags; > >-#ifdef CONFIG_X86_32 >- /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit >- * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 >- */ >- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; >-#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) >- /* Don't try to enable write-combining when it can't work, or things >- * may be slow >- * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 >- */ >- >-#ifndef CONFIG_COMPILE_TEST >-#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ >- thanks to write-combining >-#endif >- >- if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) >- DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " >- "better performance thanks to write-combining\n"); >- bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; >-#else >- /* For architectures that don't support WC memory, >- * mask out the WC flag from the BO >- */ >- if (!drm_arch_can_wc_memory()) >+ if (!amdgpu_bo_support_uswc(bo->flags)) > bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; >-#endif > > bo->tbo.bdev = &adev->mman.bdev; > if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | >@@ -521,7 +528,7 @@ > bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { > struct dma_fence *fence; > >- r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); >+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); > if (unlikely(r)) > goto fail_unreserve; > >@@ -544,7 +551,7 @@ > > fail_unreserve: > if (!bp->resv) >- ww_mutex_unlock(&bo->tbo.resv->lock); >+ reservation_object_unlock(bo->tbo.base.resv); > amdgpu_bo_unref(&bo); > return r; > } >@@ -565,7 +572,7 @@ > bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | > AMDGPU_GEM_CREATE_SHADOW; > bp.type = ttm_bo_type_kernel; >- bp.resv = bo->tbo.resv; >+ bp.resv = bo->tbo.base.resv; > > r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); > if (!r) { >@@ -606,13 +613,13 @@ > > if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { > if (!bp->resv) >- WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, >+ WARN_ON(reservation_object_lock((*bo_ptr)->tbo.base.resv, > NULL)); > > r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); > > if (!bp->resv) >- reservation_object_unlock((*bo_ptr)->tbo.resv); >+ reservation_object_unlock((*bo_ptr)->tbo.base.resv); > > if (r) > amdgpu_bo_unref(bo_ptr); >@@ -709,7 +716,7 @@ > return 0; > } > >- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, >+ r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, false, false, > MAX_SCHEDULE_TIMEOUT); > if (r < 0) > return r; >@@ -1087,7 +1094,7 @@ > */ > void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) > { >- lockdep_assert_held(&bo->tbo.resv->lock.base); >+ reservation_object_assert_held(bo->tbo.base.resv); > > if (tiling_flags) > *tiling_flags = bo->tiling_flags; >@@ -1212,6 +1219,42 @@ > } > > /** >+ * amdgpu_bo_move_notify - notification about a BO being released >+ * @bo: pointer to a buffer object >+ * >+ * Wipes VRAM buffers whose contents should not be leaked before the >+ * memory is released. >+ */ >+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) >+{ >+ struct dma_fence *fence = NULL; >+ struct amdgpu_bo *abo; >+ int r; >+ >+ if (!amdgpu_bo_is_amdgpu_bo(bo)) >+ return; >+ >+ abo = ttm_to_amdgpu_bo(bo); >+ >+ if (abo->kfd_bo) >+ amdgpu_amdkfd_unreserve_memory_limit(abo); >+ >+ if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || >+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) >+ return; >+ >+ reservation_object_lock(bo->base.resv, NULL); >+ >+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); >+ if (!WARN_ON(r)) { >+ amdgpu_bo_fence(abo, fence, false); >+ dma_fence_put(fence); >+ } >+ >+ reservation_object_unlock(bo->base.resv); >+} >+ >+/** > * amdgpu_bo_fault_reserve_notify - notification about a memory fault > * @bo: pointer to a buffer object > * >@@ -1283,7 +1326,7 @@ > void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, > bool shared) > { >- struct reservation_object *resv = bo->tbo.resv; >+ struct reservation_object *resv = bo->tbo.base.resv; > > if (shared) > reservation_object_add_shared_fence(resv, fence); >@@ -1308,7 +1351,7 @@ > int r; > > amdgpu_sync_create(&sync); >- amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); >+ amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false); > r = amdgpu_sync_wait(&sync, intr); > amdgpu_sync_free(&sync); > >@@ -1328,7 +1371,7 @@ > u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) > { > WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); >- WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && >+ WARN_ON_ONCE(!reservation_object_is_locked(bo->tbo.base.resv) && > !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); > WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); > WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 2019-08-31 15:01:11.842736167 -0500 >@@ -94,7 +94,6 @@ > /* per VM structure for page tables and with virtual addresses */ > struct amdgpu_vm_bo_base *vm_bo; > /* Constant after initialization */ >- struct drm_gem_object gem_base; > struct amdgpu_bo *parent; > struct amdgpu_bo *shadow; > >@@ -192,7 +191,7 @@ > */ > static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) > { >- return drm_vma_node_offset_addr(&bo->tbo.vma_node); >+ return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); > } > > /** >@@ -265,6 +264,7 @@ > void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, > bool evict, > struct ttm_mem_reg *new_mem); >+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); > int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); > void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, > bool shared); >@@ -308,5 +308,7 @@ > struct seq_file *m); > #endif > >+bool amdgpu_bo_support_uswc(u64 bo_flags); >+ > > #endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 2019-08-31 15:01:11.843736167 -0500 >@@ -325,13 +325,6 @@ > (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) > return -EINVAL; > >- if (!amdgpu_sriov_vf(adev)) { >- if (is_support_sw_smu(adev)) >- current_level = smu_get_performance_level(&adev->smu); >- else if (adev->powerplay.pp_funcs->get_performance_level) >- current_level = amdgpu_dpm_get_performance_level(adev); >- } >- > if (strncmp("low", buf, strlen("low")) == 0) { > level = AMD_DPM_FORCED_LEVEL_LOW; > } else if (strncmp("high", buf, strlen("high")) == 0) { >@@ -355,17 +348,23 @@ > goto fail; > } > >- if (amdgpu_sriov_vf(adev)) { >- if (amdgim_is_hwperf(adev) && >- adev->virt.ops->force_dpm_level) { >- mutex_lock(&adev->pm.mutex); >- adev->virt.ops->force_dpm_level(adev, level); >- mutex_unlock(&adev->pm.mutex); >- return count; >- } else { >- return -EINVAL; >+ /* handle sriov case here */ >+ if (amdgpu_sriov_vf(adev)) { >+ if (amdgim_is_hwperf(adev) && >+ adev->virt.ops->force_dpm_level) { >+ mutex_lock(&adev->pm.mutex); >+ adev->virt.ops->force_dpm_level(adev, level); >+ mutex_unlock(&adev->pm.mutex); >+ return count; >+ } else { >+ return -EINVAL; > } >- } >+ } >+ >+ if (is_support_sw_smu(adev)) >+ current_level = smu_get_performance_level(&adev->smu); >+ else if (adev->powerplay.pp_funcs->get_performance_level) >+ current_level = amdgpu_dpm_get_performance_level(adev); > > if (current_level == level) > return count; >@@ -746,10 +745,10 @@ > } > > /** >- * DOC: ppfeatures >+ * DOC: pp_features > * > * The amdgpu driver provides a sysfs API for adjusting what powerplay >- * features to be enabled. The file ppfeatures is used for this. And >+ * features to be enabled. The file pp_features is used for this. And > * this is only available for Vega10 and later dGPUs. > * > * Reading back the file will show you the followings: >@@ -761,7 +760,7 @@ > * the corresponding bit from original ppfeature masks and input the > * new ppfeature masks. > */ >-static ssize_t amdgpu_set_ppfeature_status(struct device *dev, >+static ssize_t amdgpu_set_pp_feature_status(struct device *dev, > struct device_attribute *attr, > const char *buf, > size_t count) >@@ -778,7 +777,7 @@ > pr_debug("featuremask = 0x%llx\n", featuremask); > > if (is_support_sw_smu(adev)) { >- ret = smu_set_ppfeature_status(&adev->smu, featuremask); >+ ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); > if (ret) > return -EINVAL; > } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { >@@ -790,7 +789,7 @@ > return count; > } > >-static ssize_t amdgpu_get_ppfeature_status(struct device *dev, >+static ssize_t amdgpu_get_pp_feature_status(struct device *dev, > struct device_attribute *attr, > char *buf) > { >@@ -798,7 +797,7 @@ > struct amdgpu_device *adev = ddev->dev_private; > > if (is_support_sw_smu(adev)) { >- return smu_get_ppfeature_status(&adev->smu, buf); >+ return smu_sys_get_pp_feature_mask(&adev->smu, buf); > } else if (adev->powerplay.pp_funcs->get_ppfeature_status) > return amdgpu_dpm_get_ppfeature_status(adev, buf); > >@@ -1458,9 +1457,9 @@ > static DEVICE_ATTR(mem_busy_percent, S_IRUGO, > amdgpu_get_memory_busy_percent, NULL); > static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); >-static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, >- amdgpu_get_ppfeature_status, >- amdgpu_set_ppfeature_status); >+static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, >+ amdgpu_get_pp_feature_status, >+ amdgpu_set_pp_feature_status); > static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); > > static ssize_t amdgpu_hwmon_show_temp(struct device *dev, >@@ -1625,20 +1624,16 @@ > (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) > return -EINVAL; > >- if (is_support_sw_smu(adev)) { >- err = kstrtoint(buf, 10, &value); >- if (err) >- return err; >+ err = kstrtoint(buf, 10, &value); >+ if (err) >+ return err; > >+ if (is_support_sw_smu(adev)) { > smu_set_fan_control_mode(&adev->smu, value); > } else { > if (!adev->powerplay.pp_funcs->set_fan_control_mode) > return -EINVAL; > >- err = kstrtoint(buf, 10, &value); >- if (err) >- return err; >- > amdgpu_dpm_set_fan_control_mode(adev, value); > } > >@@ -2058,16 +2053,18 @@ > return err; > > value = value / 1000000; /* convert to Watt */ >+ > if (is_support_sw_smu(adev)) { >- adev->smu.funcs->set_power_limit(&adev->smu, value); >+ err = smu_set_power_limit(&adev->smu, value); > } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { > err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); >- if (err) >- return err; > } else { >- return -EINVAL; >+ err = -EINVAL; > } > >+ if (err) >+ return err; >+ > return count; > } > >@@ -2352,7 +2349,9 @@ > effective_mode &= ~S_IWUSR; > } > >- if ((adev->flags & AMD_IS_APU) && >+ if (((adev->flags & AMD_IS_APU) || >+ adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ >+ adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ > (attr == &sensor_dev_attr_power1_average.dev_attr.attr || > attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || > attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| >@@ -2376,6 +2375,12 @@ > return 0; > } > >+ if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ >+ adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ >+ (attr == &sensor_dev_attr_in0_input.dev_attr.attr || >+ attr == &sensor_dev_attr_in0_label.dev_attr.attr)) >+ return 0; >+ > /* only APUs have vddnb */ > if (!(adev->flags & AMD_IS_APU) && > (attr == &sensor_dev_attr_in1_input.dev_attr.attr || >@@ -2831,10 +2836,12 @@ > DRM_ERROR("failed to create device file pp_dpm_socclk\n"); > return ret; > } >- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); >- if (ret) { >- DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); >- return ret; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); >+ if (ret) { >+ DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); >+ return ret; >+ } > } > } > if (adev->asic_type >= CHIP_VEGA20) { >@@ -2844,10 +2851,12 @@ > return ret; > } > } >- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); >- if (ret) { >- DRM_ERROR("failed to create device file pp_dpm_pcie\n"); >- return ret; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); >+ if (ret) { >+ DRM_ERROR("failed to create device file pp_dpm_pcie\n"); >+ return ret; >+ } > } > ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od); > if (ret) { >@@ -2917,10 +2926,10 @@ > if ((adev->asic_type >= CHIP_VEGA10) && > !(adev->flags & AMD_IS_APU)) { > ret = device_create_file(adev->dev, >- &dev_attr_ppfeatures); >+ &dev_attr_pp_features); > if (ret) { > DRM_ERROR("failed to create device file " >- "ppfeatures\n"); >+ "pp_features\n"); > return ret; > } > } >@@ -2951,9 +2960,11 @@ > device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); > if (adev->asic_type >= CHIP_VEGA10) { > device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); >- device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); >+ if (adev->asic_type != CHIP_ARCTURUS) >+ device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); > } >- device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); >+ if (adev->asic_type != CHIP_ARCTURUS) >+ device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); > if (adev->asic_type >= CHIP_VEGA20) > device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); > device_remove_file(adev->dev, &dev_attr_pp_sclk_od); >@@ -2974,7 +2985,7 @@ > device_remove_file(adev->dev, &dev_attr_unique_id); > if ((adev->asic_type >= CHIP_VEGA10) && > !(adev->flags & AMD_IS_APU)) >- device_remove_file(adev->dev, &dev_attr_ppfeatures); >+ device_remove_file(adev->dev, &dev_attr_pp_features); > } > > void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 2019-08-31 15:01:11.843736167 -0500 >@@ -32,6 +32,7 @@ > #include "psp_v3_1.h" > #include "psp_v10_0.h" > #include "psp_v11_0.h" >+#include "psp_v12_0.h" > > static void psp_set_funcs(struct amdgpu_device *adev); > >@@ -53,13 +54,19 @@ > psp->autoload_supported = false; > break; > case CHIP_VEGA20: >+ case CHIP_ARCTURUS: > psp_v11_0_set_psp_funcs(psp); > psp->autoload_supported = false; > break; > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > psp_v11_0_set_psp_funcs(psp); > psp->autoload_supported = true; > break; >+ case CHIP_RENOIR: >+ psp_v12_0_set_psp_funcs(psp); >+ break; > default: > return -EINVAL; > } >@@ -137,8 +144,7 @@ > memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); > > index = atomic_inc_return(&psp->fence_value); >- ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, >- fence_mc_addr, index); >+ ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); > if (ret) { > atomic_dec(&psp->fence_value); > mutex_unlock(&psp->mutex); >@@ -162,8 +168,8 @@ > if (ucode) > DRM_WARN("failed to load ucode id (%d) ", > ucode->ucode_id); >- DRM_WARN("psp command failed and response status is (%d)\n", >- psp->cmd_buf_mem->resp.status); >+ DRM_WARN("psp command failed and response status is (0x%X)\n", >+ psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); > if (!timeout) { > mutex_unlock(&psp->mutex); > return -EINVAL; >@@ -233,6 +239,8 @@ > { > int ret; > int tmr_size; >+ void *tmr_buf; >+ void **pptr; > > /* > * According to HW engineer, they prefer the TMR address be "naturally >@@ -255,9 +263,10 @@ > } > } > >+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; > ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, > AMDGPU_GEM_DOMAIN_VRAM, >- &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); >+ &psp->tmr_bo, &psp->tmr_mc_addr, pptr); > > return ret; > } >@@ -831,7 +840,6 @@ > "XGMI: Failed to initialize XGMI session\n"); > } > >- > if (psp->adev->psp.ta_fw) { > ret = psp_ras_initialize(psp); > if (ret) >@@ -852,6 +860,24 @@ > case AMDGPU_UCODE_ID_SDMA1: > *type = GFX_FW_TYPE_SDMA1; > break; >+ case AMDGPU_UCODE_ID_SDMA2: >+ *type = GFX_FW_TYPE_SDMA2; >+ break; >+ case AMDGPU_UCODE_ID_SDMA3: >+ *type = GFX_FW_TYPE_SDMA3; >+ break; >+ case AMDGPU_UCODE_ID_SDMA4: >+ *type = GFX_FW_TYPE_SDMA4; >+ break; >+ case AMDGPU_UCODE_ID_SDMA5: >+ *type = GFX_FW_TYPE_SDMA5; >+ break; >+ case AMDGPU_UCODE_ID_SDMA6: >+ *type = GFX_FW_TYPE_SDMA6; >+ break; >+ case AMDGPU_UCODE_ID_SDMA7: >+ *type = GFX_FW_TYPE_SDMA7; >+ break; > case AMDGPU_UCODE_ID_CP_CE: > *type = GFX_FW_TYPE_CP_CE; > break; >@@ -920,6 +946,60 @@ > return 0; > } > >+static void psp_print_fw_hdr(struct psp_context *psp, >+ struct amdgpu_firmware_info *ucode) >+{ >+ struct amdgpu_device *adev = psp->adev; >+ const struct sdma_firmware_header_v1_0 *sdma_hdr = >+ (const struct sdma_firmware_header_v1_0 *) >+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; >+ const struct gfx_firmware_header_v1_0 *ce_hdr = >+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; >+ const struct gfx_firmware_header_v1_0 *pfp_hdr = >+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; >+ const struct gfx_firmware_header_v1_0 *me_hdr = >+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; >+ const struct gfx_firmware_header_v1_0 *mec_hdr = >+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; >+ const struct rlc_firmware_header_v2_0 *rlc_hdr = >+ (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; >+ const struct smc_firmware_header_v1_0 *smc_hdr = >+ (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; >+ >+ switch (ucode->ucode_id) { >+ case AMDGPU_UCODE_ID_SDMA0: >+ case AMDGPU_UCODE_ID_SDMA1: >+ case AMDGPU_UCODE_ID_SDMA2: >+ case AMDGPU_UCODE_ID_SDMA3: >+ case AMDGPU_UCODE_ID_SDMA4: >+ case AMDGPU_UCODE_ID_SDMA5: >+ case AMDGPU_UCODE_ID_SDMA6: >+ case AMDGPU_UCODE_ID_SDMA7: >+ amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_CP_CE: >+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_CP_PFP: >+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_CP_ME: >+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_CP_MEC1: >+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_RLC_G: >+ amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); >+ break; >+ case AMDGPU_UCODE_ID_SMC: >+ amdgpu_ucode_print_smc_hdr(&smc_hdr->header); >+ break; >+ default: >+ break; >+ } >+} >+ > static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, > struct psp_gfx_cmd_resp *cmd) > { >@@ -980,17 +1060,31 @@ > if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && > (psp_smu_reload_quirk(psp) || psp->autoload_supported)) > continue; >+ > if (amdgpu_sriov_vf(adev) && > (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 > || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 >+ || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 > || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) > /*skip ucode loading in SRIOV VF */ > continue; >+ > if (psp->autoload_supported && > (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || > ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) > /* skip mec JT when autoload is enabled */ > continue; >+ /* Renoir only needs to load mec jump table one time */ >+ if (adev->asic_type == CHIP_RENOIR && >+ ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) >+ continue; >+ >+ psp_print_fw_hdr(psp, ucode); > > ret = psp_execute_np_fw_load(psp, ucode); > if (ret) >@@ -1115,6 +1209,8 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > struct psp_context *psp = &adev->psp; >+ void *tmr_buf; >+ void **pptr; > > if (adev->gmc.xgmi.num_physical_nodes > 1 && > psp->xgmi_context.initialized == 1) >@@ -1125,7 +1221,8 @@ > > psp_ring_destroy(psp, PSP_RING_TYPE__KM); > >- amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); >+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; >+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); > amdgpu_bo_free_kernel(&psp->fw_pri_bo, > &psp->fw_pri_mc_addr, &psp->fw_pri_buf); > amdgpu_bo_free_kernel(&psp->fence_buf_bo, >@@ -1328,4 +1425,13 @@ > .minor = 0, > .rev = 0, > .funcs = &psp_ip_funcs, >+}; >+ >+const struct amdgpu_ip_block_version psp_v12_0_ip_block = >+{ >+ .type = AMD_IP_BLOCK_TYPE_PSP, >+ .major = 12, >+ .minor = 0, >+ .rev = 0, >+ .funcs = &psp_ip_funcs, > }; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 2019-08-31 15:01:11.843736167 -0500 >@@ -90,7 +90,6 @@ > int (*ring_destroy)(struct psp_context *psp, > enum psp_ring_type ring_type); > int (*cmd_submit)(struct psp_context *psp, >- struct amdgpu_firmware_info *ucode, > uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, > int index); > bool (*compare_sram_data)(struct psp_context *psp, >@@ -172,7 +171,6 @@ > /* tmr buffer */ > struct amdgpu_bo *tmr_bo; > uint64_t tmr_mc_addr; >- void *tmr_buf; > > /* asd firmware and buffer */ > const struct firmware *asd_fw; >@@ -223,8 +221,8 @@ > #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) > #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) > #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) >-#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ >- (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) >+#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \ >+ (psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index)) > #define psp_compare_sram_data(psp, ucode, type) \ > (psp)->funcs->compare_sram_data((psp), (ucode), (type)) > #define psp_init_microcode(psp) \ >@@ -270,6 +268,7 @@ > uint32_t field_val, uint32_t mask, bool check_changed); > > extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; >+extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; > > int psp_gpu_reset(struct amdgpu_device *adev); > int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 2019-08-31 15:01:11.843736167 -0500 >@@ -30,74 +30,6 @@ > #include "amdgpu_ras.h" > #include "amdgpu_atomfirmware.h" > >-struct ras_ih_data { >- /* interrupt bottom half */ >- struct work_struct ih_work; >- int inuse; >- /* IP callback */ >- ras_ih_cb cb; >- /* full of entries */ >- unsigned char *ring; >- unsigned int ring_size; >- unsigned int element_size; >- unsigned int aligned_element_size; >- unsigned int rptr; >- unsigned int wptr; >-}; >- >-struct ras_fs_data { >- char sysfs_name[32]; >- char debugfs_name[32]; >-}; >- >-struct ras_err_data { >- unsigned long ue_count; >- unsigned long ce_count; >-}; >- >-struct ras_err_handler_data { >- /* point to bad pages array */ >- struct { >- unsigned long bp; >- struct amdgpu_bo *bo; >- } *bps; >- /* the count of entries */ >- int count; >- /* the space can place new entries */ >- int space_left; >- /* last reserved entry's index + 1 */ >- int last_reserved; >-}; >- >-struct ras_manager { >- struct ras_common_if head; >- /* reference count */ >- int use; >- /* ras block link */ >- struct list_head node; >- /* the device */ >- struct amdgpu_device *adev; >- /* debugfs */ >- struct dentry *ent; >- /* sysfs */ >- struct device_attribute sysfs_attr; >- int attr_inuse; >- >- /* fs node name */ >- struct ras_fs_data fs_data; >- >- /* IH data */ >- struct ras_ih_data ih_data; >- >- struct ras_err_data err_data; >-}; >- >-struct ras_badpage { >- unsigned int bp; >- unsigned int size; >- unsigned int flags; >-}; >- > const char *ras_error_string[] = { > "none", > "parity", >@@ -130,6 +62,9 @@ > #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 > #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) > >+/* inject address is 52 bits */ >+#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) >+ > static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, > uint64_t offset, uint64_t size, > struct amdgpu_bo **bo_ptr); >@@ -196,6 +131,7 @@ > char err[9] = "ue"; > int op = -1; > int block_id; >+ uint32_t sub_block; > u64 address, value; > > if (*pos) >@@ -223,17 +159,23 @@ > return -EINVAL; > > data->head.block = block_id; >- data->head.type = memcmp("ue", err, 2) == 0 ? >- AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : >- AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; >+ /* only ue and ce errors are supported */ >+ if (!memcmp("ue", err, 2)) >+ data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; >+ else if (!memcmp("ce", err, 2)) >+ data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; >+ else >+ return -EINVAL; >+ > data->op = op; > > if (op == 2) { >- if (sscanf(str, "%*s %*s %*s %llu %llu", >- &address, &value) != 2) >- if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx", >- &address, &value) != 2) >+ if (sscanf(str, "%*s %*s %*s %u %llu %llu", >+ &sub_block, &address, &value) != 3) >+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", >+ &sub_block, &address, &value) != 3) > return -EINVAL; >+ data->head.sub_block_index = sub_block; > data->inject.address = address; > data->inject.value = value; > } >@@ -278,7 +220,7 @@ > * write the struct to the control node. > * > * bash: >- * echo op block [error [address value]] > .../ras/ras_ctrl >+ * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl > * op: disable, enable, inject > * disable: only block is needed > * enable: block and error are needed >@@ -288,10 +230,11 @@ > * error: ue, ce > * ue: multi_uncorrectable > * ce: single_correctable >+ * sub_block: sub block index, pass 0 if there is no sub block > * > * here are some examples for bash commands, >- * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl >- * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl >+ * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl >+ * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl > * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl > * > * How to check the result? >@@ -310,7 +253,6 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; > struct ras_debug_if data; >- struct amdgpu_bo *bo; > int ret = 0; > > ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); >@@ -328,17 +270,14 @@ > ret = amdgpu_ras_feature_enable(adev, &data.head, 1); > break; > case 2: >- ret = amdgpu_ras_reserve_vram(adev, >- data.inject.address, PAGE_SIZE, &bo); >- if (ret) { >- /* address was offset, now it is absolute.*/ >- data.inject.address += adev->gmc.vram_start; >- if (data.inject.address > adev->gmc.vram_end) >- break; >- } else >- data.inject.address = amdgpu_bo_gpu_offset(bo); >+ if ((data.inject.address >= adev->gmc.mc_vram_size) || >+ (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { >+ ret = -EINVAL; >+ break; >+ } >+ >+ /* data.inject.address is offset instead of absolute gpu address */ > ret = amdgpu_ras_error_inject(adev, &data.inject); >- amdgpu_ras_release_vram(adev, &bo); > break; > default: > ret = -EINVAL; >@@ -656,14 +595,46 @@ > struct ras_query_if *info) > { > struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); >+ struct ras_err_data err_data = {0, 0, 0, NULL}; > > if (!obj) > return -EINVAL; >- /* TODO might read the register to read the count */ >+ >+ switch (info->head.block) { >+ case AMDGPU_RAS_BLOCK__UMC: >+ if (adev->umc.funcs->query_ras_error_count) >+ adev->umc.funcs->query_ras_error_count(adev, &err_data); >+ /* umc query_ras_error_address is also responsible for clearing >+ * error status >+ */ >+ if (adev->umc.funcs->query_ras_error_address) >+ adev->umc.funcs->query_ras_error_address(adev, &err_data); >+ break; >+ case AMDGPU_RAS_BLOCK__GFX: >+ if (adev->gfx.funcs->query_ras_error_count) >+ adev->gfx.funcs->query_ras_error_count(adev, &err_data); >+ break; >+ case AMDGPU_RAS_BLOCK__MMHUB: >+ if (adev->mmhub_funcs->query_ras_error_count) >+ adev->mmhub_funcs->query_ras_error_count(adev, &err_data); >+ break; >+ default: >+ break; >+ } >+ >+ obj->err_data.ue_count += err_data.ue_count; >+ obj->err_data.ce_count += err_data.ce_count; > > info->ue_count = obj->err_data.ue_count; > info->ce_count = obj->err_data.ce_count; > >+ if (err_data.ce_count) >+ dev_info(adev->dev, "%ld correctable errors detected in %s block\n", >+ obj->err_data.ce_count, ras_block_str(info->head.block)); >+ if (err_data.ue_count) >+ dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", >+ obj->err_data.ue_count, ras_block_str(info->head.block)); >+ > return 0; > } > >@@ -684,13 +655,23 @@ > if (!obj) > return -EINVAL; > >- if (block_info.block_id != TA_RAS_BLOCK__UMC) { >+ switch (info->head.block) { >+ case AMDGPU_RAS_BLOCK__GFX: >+ if (adev->gfx.funcs->ras_error_inject) >+ ret = adev->gfx.funcs->ras_error_inject(adev, info); >+ else >+ ret = -EINVAL; >+ break; >+ case AMDGPU_RAS_BLOCK__UMC: >+ case AMDGPU_RAS_BLOCK__MMHUB: >+ ret = psp_ras_trigger_error(&adev->psp, &block_info); >+ break; >+ default: > DRM_INFO("%s error injection is not supported yet\n", > ras_block_str(info->head.block)); >- return -EINVAL; >+ ret = -EINVAL; > } > >- ret = psp_ras_trigger_error(&adev->psp, &block_info); > if (ret) > DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", > ras_block_str(info->head.block), >@@ -707,7 +688,7 @@ > } > > /* get the total error counts on all IPs */ >-int amdgpu_ras_query_error_count(struct amdgpu_device *adev, >+unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, > bool is_ce) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); >@@ -715,7 +696,7 @@ > struct ras_err_data data = {0, 0}; > > if (!con) >- return -EINVAL; >+ return 0; > > list_for_each_entry(obj, &con->head, node) { > struct ras_query_if info = { >@@ -723,7 +704,7 @@ > }; > > if (amdgpu_ras_error_query(adev, &info)) >- return -EINVAL; >+ return 0; > > data.ce_count += info.ce_count; > data.ue_count += info.ue_count; >@@ -812,32 +793,8 @@ > { > struct amdgpu_ras *con = > container_of(attr, struct amdgpu_ras, features_attr); >- struct drm_device *ddev = dev_get_drvdata(dev); >- struct amdgpu_device *adev = ddev->dev_private; >- struct ras_common_if head; >- int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; >- int i; >- ssize_t s; >- struct ras_manager *obj; >- >- s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); > >- for (i = 0; i < ras_block_count; i++) { >- head.block = i; >- >- if (amdgpu_ras_is_feature_enabled(adev, &head)) { >- obj = amdgpu_ras_find_obj(adev, &head); >- s += scnprintf(&buf[s], PAGE_SIZE - s, >- "%s: %s\n", >- ras_block_str(i), >- ras_err_str(obj->head.type)); >- } else >- s += scnprintf(&buf[s], PAGE_SIZE - s, >- "%s: disabled\n", >- ras_block_str(i)); >- } >- >- return s; >+ return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); > } > > static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) >@@ -1054,6 +1011,7 @@ > struct ras_ih_data *data = &obj->ih_data; > struct amdgpu_iv_entry entry; > int ret; >+ struct ras_err_data err_data = {0, 0, 0, NULL}; > > while (data->rptr != data->wptr) { > rmb(); >@@ -1068,19 +1026,19 @@ > * from the callback to udpate the error type/count, etc > */ > if (data->cb) { >- ret = data->cb(obj->adev, &entry); >+ ret = data->cb(obj->adev, &err_data, &entry); > /* ue will trigger an interrupt, and in that case > * we need do a reset to recovery the whole system. > * But leave IP do that recovery, here we just dispatch > * the error. > */ >- if (ret == AMDGPU_RAS_UE) { >- obj->err_data.ue_count++; >+ if (ret == AMDGPU_RAS_SUCCESS) { >+ /* these counts could be left as 0 if >+ * some blocks do not count error number >+ */ >+ obj->err_data.ue_count += err_data.ue_count; >+ obj->err_data.ce_count += err_data.ce_count; > } >- /* Might need get ce count by register, but not all IP >- * saves ce count, some IP just use one bit or two bits >- * to indicate ce happened. >- */ > } > } > } >@@ -1577,6 +1535,10 @@ > if (amdgpu_ras_fs_init(adev)) > goto fs_out; > >+ /* ras init for each ras block */ >+ if (adev->umc.funcs->ras_init) >+ adev->umc.funcs->ras_init(adev); >+ > DRM_INFO("RAS INFO: ras initialized successfully, " > "hardware ability[%x] ras_mask[%x]\n", > con->hw_supported, con->supported); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 2019-08-31 15:01:11.843736167 -0500 >@@ -0,0 +1,493 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#include "amdgpu_ras_eeprom.h" >+#include "amdgpu.h" >+#include "amdgpu_ras.h" >+#include <linux/bits.h> >+#include "smu_v11_0_i2c.h" >+ >+#define EEPROM_I2C_TARGET_ADDR 0xA0 >+ >+/* >+ * The 2 macros bellow represent the actual size in bytes that >+ * those entities occupy in the EEPROM memory. >+ * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which >+ * uses uint64 to store 6b fields such as retired_page. >+ */ >+#define EEPROM_TABLE_HEADER_SIZE 20 >+#define EEPROM_TABLE_RECORD_SIZE 24 >+ >+#define EEPROM_ADDRESS_SIZE 0x2 >+ >+/* Table hdr is 'AMDR' */ >+#define EEPROM_TABLE_HDR_VAL 0x414d4452 >+#define EEPROM_TABLE_VER 0x00010000 >+ >+/* Assume 2 Mbit size */ >+#define EEPROM_SIZE_BYTES 256000 >+#define EEPROM_PAGE__SIZE_BYTES 256 >+#define EEPROM_HDR_START 0 >+#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) >+#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) >+#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) >+ >+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev >+ >+static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, >+ unsigned char *buff) >+{ >+ uint32_t *pp = (uint32_t *) buff; >+ >+ pp[0] = cpu_to_le32(hdr->header); >+ pp[1] = cpu_to_le32(hdr->version); >+ pp[2] = cpu_to_le32(hdr->first_rec_offset); >+ pp[3] = cpu_to_le32(hdr->tbl_size); >+ pp[4] = cpu_to_le32(hdr->checksum); >+} >+ >+static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, >+ unsigned char *buff) >+{ >+ uint32_t *pp = (uint32_t *)buff; >+ >+ hdr->header = le32_to_cpu(pp[0]); >+ hdr->version = le32_to_cpu(pp[1]); >+ hdr->first_rec_offset = le32_to_cpu(pp[2]); >+ hdr->tbl_size = le32_to_cpu(pp[3]); >+ hdr->checksum = le32_to_cpu(pp[4]); >+} >+ >+static int __update_table_header(struct amdgpu_ras_eeprom_control *control, >+ unsigned char *buff) >+{ >+ int ret = 0; >+ struct i2c_msg msg = { >+ .addr = EEPROM_I2C_TARGET_ADDR, >+ .flags = 0, >+ .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, >+ .buf = buff, >+ }; >+ >+ >+ *(uint16_t *)buff = EEPROM_HDR_START; >+ __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); >+ >+ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); >+ if (ret < 1) >+ DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); >+ >+ return ret; >+} >+ >+static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control); >+ >+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) >+{ >+ int ret = 0; >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; >+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; >+ struct i2c_msg msg = { >+ .addr = EEPROM_I2C_TARGET_ADDR, >+ .flags = I2C_M_RD, >+ .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, >+ .buf = buff, >+ }; >+ >+ mutex_init(&control->tbl_mutex); >+ >+ switch (adev->asic_type) { >+ case CHIP_VEGA20: >+ ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); >+ break; >+ >+ default: >+ return 0; >+ } >+ >+ if (ret) { >+ DRM_ERROR("Failed to init I2C controller, ret:%d", ret); >+ return ret; >+ } >+ >+ /* Read/Create table header from EEPROM address 0 */ >+ ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); >+ if (ret < 1) { >+ DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); >+ return ret; >+ } >+ >+ __decode_table_header_from_buff(hdr, &buff[2]); >+ >+ if (hdr->header == EEPROM_TABLE_HDR_VAL) { >+ control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / >+ EEPROM_TABLE_RECORD_SIZE; >+ DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", >+ control->num_recs); >+ >+ } else { >+ DRM_INFO("Creating new EEPROM table"); >+ >+ hdr->header = EEPROM_TABLE_HDR_VAL; >+ hdr->version = EEPROM_TABLE_VER; >+ hdr->first_rec_offset = EEPROM_RECORD_START; >+ hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; >+ >+ adev->psp.ras.ras->eeprom_control.tbl_byte_sum = >+ __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control); >+ ret = __update_table_header(control, buff); >+ } >+ >+ /* Start inserting records from here */ >+ adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START; >+ >+ return ret == 1 ? 0 : -EIO; >+} >+ >+void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ switch (adev->asic_type) { >+ case CHIP_VEGA20: >+ smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); >+ break; >+ >+ default: >+ return; >+ } >+} >+ >+static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *record, >+ unsigned char *buff) >+{ >+ __le64 tmp = 0; >+ int i = 0; >+ >+ /* Next are all record fields according to EEPROM page spec in LE foramt */ >+ buff[i++] = record->err_type; >+ >+ buff[i++] = record->bank; >+ >+ tmp = cpu_to_le64(record->ts); >+ memcpy(buff + i, &tmp, 8); >+ i += 8; >+ >+ tmp = cpu_to_le64((record->offset & 0xffffffffffff)); >+ memcpy(buff + i, &tmp, 6); >+ i += 6; >+ >+ buff[i++] = record->mem_channel; >+ buff[i++] = record->mcumc_id; >+ >+ tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); >+ memcpy(buff + i, &tmp, 6); >+} >+ >+static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *record, >+ unsigned char *buff) >+{ >+ __le64 tmp = 0; >+ int i = 0; >+ >+ /* Next are all record fields according to EEPROM page spec in LE foramt */ >+ record->err_type = buff[i++]; >+ >+ record->bank = buff[i++]; >+ >+ memcpy(&tmp, buff + i, 8); >+ record->ts = le64_to_cpu(tmp); >+ i += 8; >+ >+ memcpy(&tmp, buff + i, 6); >+ record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); >+ i += 6; >+ >+ buff[i++] = record->mem_channel; >+ buff[i++] = record->mcumc_id; >+ >+ memcpy(&tmp, buff + i, 6); >+ record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); >+} >+ >+/* >+ * When reaching end of EEPROM memory jump back to 0 record address >+ * When next record access will go beyond EEPROM page boundary modify bits A17/A8 >+ * in I2C selector to go to next page >+ */ >+static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) >+{ >+ uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; >+ >+ /* When all EEPROM memory used jump back to 0 address */ >+ if (next_address > EEPROM_SIZE_BYTES) { >+ DRM_INFO("Reached end of EEPROM memory, jumping to 0 " >+ "and overriding old record"); >+ return EEPROM_RECORD_START; >+ } >+ >+ /* >+ * To check if we overflow page boundary compare next address with >+ * current and see if bits 17/8 of the EEPROM address will change >+ * If they do start from the next 256b page >+ * >+ * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 >+ */ >+ if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { >+ DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", >+ (next_address & EEPROM_ADDR_MSB_MASK)); >+ >+ return (next_address & EEPROM_ADDR_MSB_MASK); >+ } >+ >+ return curr_address; >+} >+ >+ >+static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) >+{ >+ int i; >+ uint32_t tbl_sum = 0; >+ >+ /* Header checksum, skip checksum field in the calculation */ >+ for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) >+ tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); >+ >+ return tbl_sum; >+} >+ >+static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, >+ int num) >+{ >+ int i, j; >+ uint32_t tbl_sum = 0; >+ >+ /* Records checksum */ >+ for (i = 0; i < num; i++) { >+ struct eeprom_table_record *record = &records[i]; >+ >+ for (j = 0; j < sizeof(*record); j++) { >+ tbl_sum += *(((unsigned char *)record) + j); >+ } >+ } >+ >+ return tbl_sum; >+} >+ >+static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *records, int num) >+{ >+ return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); >+} >+ >+/* Checksum = 256 -((sum of all table entries) mod 256) */ >+static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *records, int num, >+ uint32_t old_hdr_byte_sum) >+{ >+ /* >+ * This will update the table sum with new records. >+ * >+ * TODO: What happens when the EEPROM table is to be wrapped around >+ * and old records from start will get overridden. >+ */ >+ >+ /* need to recalculate updated header byte sum */ >+ control->tbl_byte_sum -= old_hdr_byte_sum; >+ control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); >+ >+ control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); >+} >+ >+/* table sum mod 256 + checksum must equals 256 */ >+static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *records, int num) >+{ >+ control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); >+ >+ if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { >+ DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); >+ return false; >+ } >+ >+ return true; >+} >+ >+int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *records, >+ bool write, >+ int num) >+{ >+ int i, ret = 0; >+ struct i2c_msg *msgs; >+ unsigned char *buffs; >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ if (adev->asic_type != CHIP_VEGA20) >+ return 0; >+ >+ buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, >+ GFP_KERNEL); >+ if (!buffs) >+ return -ENOMEM; >+ >+ mutex_lock(&control->tbl_mutex); >+ >+ msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); >+ if (!msgs) { >+ ret = -ENOMEM; >+ goto free_buff; >+ } >+ >+ /* In case of overflow just start from beginning to not lose newest records */ >+ if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) >+ control->next_addr = EEPROM_RECORD_START; >+ >+ >+ /* >+ * TODO Currently makes EEPROM writes for each record, this creates >+ * internal fragmentation. Optimized the code to do full page write of >+ * 256b >+ */ >+ for (i = 0; i < num; i++) { >+ unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; >+ struct eeprom_table_record *record = &records[i]; >+ struct i2c_msg *msg = &msgs[i]; >+ >+ control->next_addr = __correct_eeprom_dest_address(control->next_addr); >+ >+ /* >+ * Update bits 16,17 of EEPROM address in I2C address by setting them >+ * to bits 1,2 of Device address byte >+ */ >+ msg->addr = EEPROM_I2C_TARGET_ADDR | >+ ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); >+ msg->flags = write ? 0 : I2C_M_RD; >+ msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; >+ msg->buf = buff; >+ >+ /* Insert the EEPROM dest addess, bits 0-15 */ >+ buff[0] = ((control->next_addr >> 8) & 0xff); >+ buff[1] = (control->next_addr & 0xff); >+ >+ /* EEPROM table content is stored in LE format */ >+ if (write) >+ __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); >+ >+ /* >+ * The destination EEPROM address might need to be corrected to account >+ * for page or entire memory wrapping >+ */ >+ control->next_addr += EEPROM_TABLE_RECORD_SIZE; >+ } >+ >+ ret = i2c_transfer(&control->eeprom_accessor, msgs, num); >+ if (ret < 1) { >+ DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); >+ >+ /* TODO Restore prev next EEPROM address ? */ >+ goto free_msgs; >+ } >+ >+ >+ if (!write) { >+ for (i = 0; i < num; i++) { >+ unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; >+ struct eeprom_table_record *record = &records[i]; >+ >+ __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); >+ } >+ } >+ >+ if (write) { >+ uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); >+ >+ /* >+ * Update table header with size and CRC and account for table >+ * wrap around where the assumption is that we treat it as empty >+ * table >+ * >+ * TODO - Check the assumption is correct >+ */ >+ control->num_recs += num; >+ control->num_recs %= EEPROM_MAX_RECORD_NUM; >+ control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; >+ if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) >+ control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + >+ control->num_recs * EEPROM_TABLE_RECORD_SIZE; >+ >+ __update_tbl_checksum(control, records, num, old_hdr_byte_sum); >+ >+ __update_table_header(control, buffs); >+ } else if (!__validate_tbl_checksum(control, records, num)) { >+ DRM_WARN("EEPROM Table checksum mismatch!"); >+ /* TODO Uncomment when EEPROM read/write is relliable */ >+ /* ret = -EIO; */ >+ } >+ >+free_msgs: >+ kfree(msgs); >+ >+free_buff: >+ kfree(buffs); >+ >+ mutex_unlock(&control->tbl_mutex); >+ >+ return ret == num ? 0 : -EIO; >+} >+ >+/* Used for testing if bugs encountered */ >+#if 0 >+void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) >+{ >+ int i; >+ struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); >+ >+ if (!recs) >+ return; >+ >+ for (i = 0; i < 1 ; i++) { >+ recs[i].address = 0xdeadbeef; >+ recs[i].retired_page = i; >+ } >+ >+ if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { >+ >+ memset(recs, 0, sizeof(*recs) * 1); >+ >+ control->next_addr = EEPROM_RECORD_START; >+ >+ if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { >+ for (i = 0; i < 1; i++) >+ DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", >+ recs[i].address, recs[i].retired_page); >+ } else >+ DRM_ERROR("Failed in reading from table"); >+ >+ } else >+ DRM_ERROR("Failed in writing to table"); >+} >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 2019-08-31 15:01:11.843736167 -0500 >@@ -0,0 +1,90 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#ifndef _AMDGPU_RAS_EEPROM_H >+#define _AMDGPU_RAS_EEPROM_H >+ >+#include <linux/i2c.h> >+ >+struct amdgpu_device; >+ >+enum amdgpu_ras_eeprom_err_type{ >+ AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, >+ AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, >+ AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE >+}; >+ >+struct amdgpu_ras_eeprom_table_header { >+ uint32_t header; >+ uint32_t version; >+ uint32_t first_rec_offset; >+ uint32_t tbl_size; >+ uint32_t checksum; >+}__attribute__((__packed__)); >+ >+struct amdgpu_ras_eeprom_control { >+ struct amdgpu_ras_eeprom_table_header tbl_hdr; >+ struct i2c_adapter eeprom_accessor; >+ uint32_t next_addr; >+ unsigned int num_recs; >+ struct mutex tbl_mutex; >+ bool bus_locked; >+ uint32_t tbl_byte_sum; >+}; >+ >+/* >+ * Represents single table record. Packed to be easily serialized into byte >+ * stream. >+ */ >+struct eeprom_table_record { >+ >+ union { >+ uint64_t address; >+ uint64_t offset; >+ }; >+ >+ uint64_t retired_page; >+ uint64_t ts; >+ >+ enum amdgpu_ras_eeprom_err_type err_type; >+ >+ union { >+ unsigned char bank; >+ unsigned char cu; >+ }; >+ >+ unsigned char mem_channel; >+ unsigned char mcumc_id; >+}__attribute__((__packed__)); >+ >+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); >+void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); >+ >+int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, >+ struct eeprom_table_record *records, >+ bool write, >+ int num); >+ >+void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); >+ >+#endif // _AMDGPU_RAS_EEPROM_H >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 2019-08-31 15:01:11.843736167 -0500 >@@ -29,6 +29,7 @@ > #include "amdgpu.h" > #include "amdgpu_psp.h" > #include "ta_ras_if.h" >+#include "amdgpu_ras_eeprom.h" > > enum amdgpu_ras_block { > AMDGPU_RAS_BLOCK__UMC = 0, >@@ -52,6 +53,236 @@ > #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST > #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) > >+enum amdgpu_ras_gfx_subblock { >+ /* CPC */ >+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, >+ AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = >+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, >+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, >+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, >+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, >+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, >+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, >+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, >+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, >+ /* CPF */ >+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = >+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, >+ AMDGPU_RAS_BLOCK__GFX_CPF_TAG, >+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, >+ /* CPG */ >+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = >+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, >+ AMDGPU_RAS_BLOCK__GFX_CPG_TAG, >+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, >+ /* GDS */ >+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, >+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, >+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, >+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, >+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, >+ /* SPI */ >+ AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, >+ /* SQ */ >+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, >+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, >+ AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, >+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, >+ /* SQC (3 ranges) */ >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, >+ /* SQC range 0 */ >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, >+ /* SQC range 1 */ >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, >+ /* SQC range 2 */ >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = >+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, >+ /* TA */ >+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = >+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, >+ AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, >+ AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, >+ AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, >+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, >+ /* TCA */ >+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = >+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, >+ /* TCC (5 sub-ranges) */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, >+ /* TCC range 0 */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, >+ AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, >+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, >+ /* TCC range 1 */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, >+ /* TCC range 2 */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, >+ AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, >+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, >+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, >+ /* TCC range 3 */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, >+ /* TCC range 4 */ >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, >+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, >+ /* TCI */ >+ AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, >+ /* TCP */ >+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = >+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, >+ AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, >+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, >+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, >+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, >+ /* TD */ >+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = >+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, >+ AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, >+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, >+ /* EA (3 sub-ranges) */ >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, >+ /* EA range 0 */ >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, >+ /* EA range 1 */ >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = >+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, >+ /* EA range 2 */ >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, >+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = >+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = >+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, >+ /* UTC VM L2 bank */ >+ AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, >+ /* UTC VM walker */ >+ AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, >+ /* UTC ATC L2 2MB cache */ >+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, >+ /* UTC ATC L2 4KB cache */ >+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, >+ AMDGPU_RAS_BLOCK__GFX_MAX >+}; >+ > enum amdgpu_ras_error_type { > AMDGPU_RAS_ERROR__NONE = 0, > AMDGPU_RAS_ERROR__PARITY = 1, >@@ -76,9 +307,6 @@ > char name[32]; > }; > >-typedef int (*ras_ih_cb)(struct amdgpu_device *adev, >- struct amdgpu_iv_entry *entry); >- > struct amdgpu_ras { > /* ras infrastructure */ > /* for ras itself. */ >@@ -106,10 +334,85 @@ > struct mutex recovery_lock; > > uint32_t flags; >+ >+ struct amdgpu_ras_eeprom_control eeprom_control; > }; > >-/* interfaces for IP */ >+struct ras_fs_data { >+ char sysfs_name[32]; >+ char debugfs_name[32]; >+}; >+ >+struct ras_err_data { >+ unsigned long ue_count; >+ unsigned long ce_count; >+ unsigned long err_addr_cnt; >+ uint64_t *err_addr; >+}; > >+struct ras_err_handler_data { >+ /* point to bad pages array */ >+ struct { >+ unsigned long bp; >+ struct amdgpu_bo *bo; >+ } *bps; >+ /* the count of entries */ >+ int count; >+ /* the space can place new entries */ >+ int space_left; >+ /* last reserved entry's index + 1 */ >+ int last_reserved; >+}; >+ >+typedef int (*ras_ih_cb)(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, >+ struct amdgpu_iv_entry *entry); >+ >+struct ras_ih_data { >+ /* interrupt bottom half */ >+ struct work_struct ih_work; >+ int inuse; >+ /* IP callback */ >+ ras_ih_cb cb; >+ /* full of entries */ >+ unsigned char *ring; >+ unsigned int ring_size; >+ unsigned int element_size; >+ unsigned int aligned_element_size; >+ unsigned int rptr; >+ unsigned int wptr; >+}; >+ >+struct ras_manager { >+ struct ras_common_if head; >+ /* reference count */ >+ int use; >+ /* ras block link */ >+ struct list_head node; >+ /* the device */ >+ struct amdgpu_device *adev; >+ /* debugfs */ >+ struct dentry *ent; >+ /* sysfs */ >+ struct device_attribute sysfs_attr; >+ int attr_inuse; >+ >+ /* fs node name */ >+ struct ras_fs_data fs_data; >+ >+ /* IH data */ >+ struct ras_ih_data ih_data; >+ >+ struct ras_err_data err_data; >+}; >+ >+struct ras_badpage { >+ unsigned int bp; >+ unsigned int size; >+ unsigned int flags; >+}; >+ >+/* interfaces for IP */ > struct ras_fs_if { > struct ras_common_if head; > char sysfs_name[32]; >@@ -184,7 +487,7 @@ > void amdgpu_ras_resume(struct amdgpu_device *adev); > void amdgpu_ras_suspend(struct amdgpu_device *adev); > >-int amdgpu_ras_query_error_count(struct amdgpu_device *adev, >+unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, > bool is_ce); > > /* error handling functions */ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 2019-08-31 15:01:11.843736167 -0500 >@@ -29,7 +29,7 @@ > #include <drm/drm_print.h> > > /* max number of rings */ >-#define AMDGPU_MAX_RINGS 24 >+#define AMDGPU_MAX_RINGS 28 > #define AMDGPU_MAX_GFX_RINGS 2 > #define AMDGPU_MAX_COMPUTE_RINGS 8 > #define AMDGPU_MAX_VCE_RINGS 3 >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 2019-08-31 15:01:11.843736167 -0500 >@@ -25,11 +25,17 @@ > #define __AMDGPU_SDMA_H__ > > /* max number of IP instances */ >-#define AMDGPU_MAX_SDMA_INSTANCES 2 >+#define AMDGPU_MAX_SDMA_INSTANCES 8 > > enum amdgpu_sdma_irq { > AMDGPU_SDMA_IRQ_INSTANCE0 = 0, > AMDGPU_SDMA_IRQ_INSTANCE1, >+ AMDGPU_SDMA_IRQ_INSTANCE2, >+ AMDGPU_SDMA_IRQ_INSTANCE3, >+ AMDGPU_SDMA_IRQ_INSTANCE4, >+ AMDGPU_SDMA_IRQ_INSTANCE5, >+ AMDGPU_SDMA_IRQ_INSTANCE6, >+ AMDGPU_SDMA_IRQ_INSTANCE7, > AMDGPU_SDMA_IRQ_LAST > }; > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 2019-08-31 15:01:11.844736167 -0500 >@@ -227,7 +227,7 @@ > > if (amdgpu_ttm_tt_get_usermm(bo->ttm)) > return -EPERM; >- return drm_vma_node_verify_access(&abo->gem_base.vma_node, >+ return drm_vma_node_verify_access(&abo->tbo.base.vma_node, > filp->private_data); > } > >@@ -440,10 +440,26 @@ > > r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, > new_mem->num_pages << PAGE_SHIFT, >- bo->resv, &fence); >+ bo->base.resv, &fence); > if (r) > goto error; > >+ /* clear the space being freed */ >+ if (old_mem->mem_type == TTM_PL_VRAM && >+ (ttm_to_amdgpu_bo(bo)->flags & >+ AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { >+ struct dma_fence *wipe_fence = NULL; >+ >+ r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, >+ NULL, &wipe_fence); >+ if (r) { >+ goto error; >+ } else if (wipe_fence) { >+ dma_fence_put(fence); >+ fence = wipe_fence; >+ } >+ } >+ > /* Always block for VM page tables before committing the new location */ > if (bo->type == ttm_bo_type_kernel) > r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); >@@ -1478,18 +1494,18 @@ > * cleanly handle page faults. > */ > if (bo->type == ttm_bo_type_kernel && >- !reservation_object_test_signaled_rcu(bo->resv, true)) >+ !reservation_object_test_signaled_rcu(bo->base.resv, true)) > return false; > > /* If bo is a KFD BO, check if the bo belongs to the current process. > * If true, then return false as any KFD process needs all its BOs to > * be resident to run successfully > */ >- flist = reservation_object_get_list(bo->resv); >+ flist = reservation_object_get_list(bo->base.resv); > if (flist) { > for (i = 0; i < flist->shared_count; ++i) { > f = rcu_dereference_protected(flist->shared[i], >- reservation_object_held(bo->resv)); >+ reservation_object_held(bo->base.resv)); > if (amdkfd_fence_check_mm(f, current->mm)) > return false; > } >@@ -1599,6 +1615,7 @@ > .move = &amdgpu_bo_move, > .verify_access = &amdgpu_verify_access, > .move_notify = &amdgpu_bo_move_notify, >+ .release_notify = &amdgpu_bo_release_notify, > .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, > .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, > .io_mem_free = &amdgpu_ttm_io_mem_free, >@@ -1721,6 +1738,7 @@ > uint64_t gtt_size; > int r; > u64 vis_vram_limit; >+ void *stolen_vga_buf; > > mutex_init(&adev->mman.gtt_window_lock); > >@@ -1728,7 +1746,7 @@ > r = ttm_bo_device_init(&adev->mman.bdev, > &amdgpu_bo_driver, > adev->ddev->anon_inode->i_mapping, >- adev->need_dma32); >+ dma_addressing_limited(adev->dev)); > if (r) { > DRM_ERROR("failed initializing buffer object driver(%d).\n", r); > return r; >@@ -1775,7 +1793,7 @@ > r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, > AMDGPU_GEM_DOMAIN_VRAM, > &adev->stolen_vga_memory, >- NULL, NULL); >+ NULL, &stolen_vga_buf); > if (r) > return r; > DRM_INFO("amdgpu: %uM of VRAM memory ready\n", >@@ -1839,8 +1857,9 @@ > */ > void amdgpu_ttm_late_init(struct amdgpu_device *adev) > { >+ void *stolen_vga_buf; > /* return the VGA stolen memory (if any) back to VRAM */ >- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); >+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); > } > > /** >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 2019-08-31 15:01:11.844736167 -0500 >@@ -38,6 +38,8 @@ > #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 > #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 > >+#define AMDGPU_POISON 0xd0bed0be >+ > struct amdgpu_mman { > struct ttm_bo_device bdev; > bool mem_global_referenced; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 2019-08-31 15:01:11.844736167 -0500 >@@ -83,8 +83,8 @@ > const struct smc_firmware_header_v2_0 *v2_hdr = > container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); > >- DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); >- DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); >+ DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); >+ DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); > } else { > DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); > } >@@ -269,6 +269,16 @@ > DRM_DEBUG("kdb_size_bytes: %u\n", > le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); > } >+ if (version_minor == 2) { >+ const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = >+ container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); >+ DRM_DEBUG("kdb_header_version: %u\n", >+ le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); >+ DRM_DEBUG("kdb_offset_bytes: %u\n", >+ le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); >+ DRM_DEBUG("kdb_size_bytes: %u\n", >+ le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); >+ } > } else { > DRM_ERROR("Unknown PSP ucode version: %u.%u\n", > version_major, version_minor); >@@ -350,11 +360,17 @@ > case CHIP_RAVEN: > case CHIP_VEGA12: > case CHIP_VEGA20: >+ case CHIP_RENOIR: > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > if (!load_type) > return AMDGPU_FW_LOAD_DIRECT; > else > return AMDGPU_FW_LOAD_PSP; >+ case CHIP_ARCTURUS: >+ return AMDGPU_FW_LOAD_DIRECT; >+ > default: > DRM_ERROR("Unknown firmware load type\n"); > } >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 2019-08-31 15:01:11.844736167 -0500 >@@ -90,6 +90,15 @@ > uint32_t kdb_size_bytes; > }; > >+/* version_major=1, version_minor=2 */ >+struct psp_firmware_header_v1_2 { >+ struct psp_firmware_header_v1_0 v1_0; >+ uint32_t reserve[3]; >+ uint32_t kdb_header_version; >+ uint32_t kdb_offset_bytes; >+ uint32_t kdb_size_bytes; >+}; >+ > /* version_major=1, version_minor=0 */ > struct ta_firmware_header_v1_0 { > struct common_firmware_header header; >@@ -262,6 +271,12 @@ > enum AMDGPU_UCODE_ID { > AMDGPU_UCODE_ID_SDMA0 = 0, > AMDGPU_UCODE_ID_SDMA1, >+ AMDGPU_UCODE_ID_SDMA2, >+ AMDGPU_UCODE_ID_SDMA3, >+ AMDGPU_UCODE_ID_SDMA4, >+ AMDGPU_UCODE_ID_SDMA5, >+ AMDGPU_UCODE_ID_SDMA6, >+ AMDGPU_UCODE_ID_SDMA7, > AMDGPU_UCODE_ID_CP_CE, > AMDGPU_UCODE_ID_CP_PFP, > AMDGPU_UCODE_ID_CP_ME, >@@ -281,6 +296,7 @@ > AMDGPU_UCODE_ID_UVD1, > AMDGPU_UCODE_ID_VCE, > AMDGPU_UCODE_ID_VCN, >+ AMDGPU_UCODE_ID_VCN1, > AMDGPU_UCODE_ID_DMCU_ERAM, > AMDGPU_UCODE_ID_DMCU_INTV, > AMDGPU_UCODE_ID_VCN0_RAM, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 2019-08-31 15:01:11.844736167 -0500 >@@ -0,0 +1,82 @@ >+/* >+ * Copyright (C) 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included >+ * in all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN >+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >+ */ >+#ifndef __AMDGPU_UMC_H__ >+#define __AMDGPU_UMC_H__ >+ >+/* implement 64 bits REG operations via 32 bits interface */ >+#define RREG64_UMC(reg) (RREG32(reg) | \ >+ ((uint64_t)RREG32((reg) + 1) << 32)) >+#define WREG64_UMC(reg, v) \ >+ do { \ >+ WREG32((reg), lower_32_bits(v)); \ >+ WREG32((reg) + 1, upper_32_bits(v)); \ >+ } while (0) >+ >+/* >+ * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, >+ * uint32_t umc_reg_offset, uint32_t channel_index) >+ */ >+#define amdgpu_umc_for_each_channel(func) \ >+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ >+ uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ >+ for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ >+ /* enable the index mode to query eror count per channel */ \ >+ adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ >+ for (channel_inst = 0; \ >+ channel_inst < adev->umc.channel_inst_num; \ >+ channel_inst++) { \ >+ /* calc the register offset according to channel instance */ \ >+ umc_reg_offset = adev->umc.channel_offs * channel_inst; \ >+ /* get channel index of interleaved memory */ \ >+ channel_index = adev->umc.channel_idx_tbl[ \ >+ umc_inst * adev->umc.channel_inst_num + channel_inst]; \ >+ (func)(adev, err_data, umc_reg_offset, channel_index); \ >+ } \ >+ } \ >+ adev->umc.funcs->disable_umc_index_mode(adev); >+ >+struct amdgpu_umc_funcs { >+ void (*ras_init)(struct amdgpu_device *adev); >+ void (*query_ras_error_count)(struct amdgpu_device *adev, >+ void *ras_error_status); >+ void (*query_ras_error_address)(struct amdgpu_device *adev, >+ void *ras_error_status); >+ void (*enable_umc_index_mode)(struct amdgpu_device *adev, >+ uint32_t umc_instance); >+ void (*disable_umc_index_mode)(struct amdgpu_device *adev); >+}; >+ >+struct amdgpu_umc { >+ /* max error count in one ras query call */ >+ uint32_t max_ras_err_cnt_per_query; >+ /* number of umc channel instance with memory map register access */ >+ uint32_t channel_inst_num; >+ /* number of umc instance with memory map register access */ >+ uint32_t umc_inst_num; >+ /* UMC regiser per channel offset */ >+ uint32_t channel_offs; >+ /* channel index table of interleaved memory */ >+ const uint32_t *channel_idx_tbl; >+ >+ const struct amdgpu_umc_funcs *funcs; >+}; >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2019-08-31 15:01:11.844736167 -0500 >@@ -1073,7 +1073,7 @@ > ib->length_dw = 16; > > if (direct) { >- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, >+ r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, > true, false, > msecs_to_jiffies(10)); > if (r == 0) >@@ -1085,7 +1085,7 @@ > if (r) > goto err_free; > } else { >- r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, >+ r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, > AMDGPU_FENCE_OWNER_UNDEFINED, false); > if (r) > goto err_free; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 2019-08-31 15:01:11.844736167 -0500 >@@ -46,12 +46,20 @@ > #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" > #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" > #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" >+#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" >+#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" > #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" >+#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" >+#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" > > MODULE_FIRMWARE(FIRMWARE_RAVEN); > MODULE_FIRMWARE(FIRMWARE_PICASSO); > MODULE_FIRMWARE(FIRMWARE_RAVEN2); >+MODULE_FIRMWARE(FIRMWARE_ARCTURUS); >+MODULE_FIRMWARE(FIRMWARE_RENOIR); > MODULE_FIRMWARE(FIRMWARE_NAVI10); >+MODULE_FIRMWARE(FIRMWARE_NAVI14); >+MODULE_FIRMWARE(FIRMWARE_NAVI12); > > static void amdgpu_vcn_idle_work_handler(struct work_struct *work); > >@@ -61,7 +69,7 @@ > const char *fw_name; > const struct common_firmware_header *hdr; > unsigned char fw_check; >- int r; >+ int i, r; > > INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); > >@@ -74,12 +82,33 @@ > else > fw_name = FIRMWARE_RAVEN; > break; >+ case CHIP_ARCTURUS: >+ fw_name = FIRMWARE_ARCTURUS; >+ break; >+ case CHIP_RENOIR: >+ fw_name = FIRMWARE_RENOIR; >+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && >+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) >+ adev->vcn.indirect_sram = true; >+ break; > case CHIP_NAVI10: > fw_name = FIRMWARE_NAVI10; > if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && > (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) > adev->vcn.indirect_sram = true; > break; >+ case CHIP_NAVI14: >+ fw_name = FIRMWARE_NAVI14; >+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && >+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) >+ adev->vcn.indirect_sram = true; >+ break; >+ case CHIP_NAVI12: >+ fw_name = FIRMWARE_NAVI12; >+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && >+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) >+ adev->vcn.indirect_sram = true; >+ break; > default: > return -EINVAL; > } >@@ -133,12 +162,18 @@ > bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; > if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) > bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); >- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, >- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, >- &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); >- if (r) { >- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); >- return r; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ >+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, >+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, >+ &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); >+ if (r) { >+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); >+ return r; >+ } > } > > if (adev->vcn.indirect_sram) { >@@ -156,26 +191,30 @@ > > int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) > { >- int i; >- >- kvfree(adev->vcn.saved_bo); >+ int i, j; > > if (adev->vcn.indirect_sram) { > amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, >- &adev->vcn.dpg_sram_gpu_addr, >- (void **)&adev->vcn.dpg_sram_cpu_addr); >+ &adev->vcn.dpg_sram_gpu_addr, >+ (void **)&adev->vcn.dpg_sram_cpu_addr); > } > >- amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, >- &adev->vcn.gpu_addr, >- (void **)&adev->vcn.cpu_addr); >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ kvfree(adev->vcn.inst[j].saved_bo); > >- amdgpu_ring_fini(&adev->vcn.ring_dec); >+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, >+ &adev->vcn.inst[j].gpu_addr, >+ (void **)&adev->vcn.inst[j].cpu_addr); > >- for (i = 0; i < adev->vcn.num_enc_rings; ++i) >- amdgpu_ring_fini(&adev->vcn.ring_enc[i]); >+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); > >- amdgpu_ring_fini(&adev->vcn.ring_jpeg); >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) >+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); >+ >+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); >+ } > > release_firmware(adev->vcn.fw); > >@@ -186,21 +225,25 @@ > { > unsigned size; > void *ptr; >+ int i; > > cancel_delayed_work_sync(&adev->vcn.idle_work); > >- if (adev->vcn.vcpu_bo == NULL) >- return 0; >- >- size = amdgpu_bo_size(adev->vcn.vcpu_bo); >- ptr = adev->vcn.cpu_addr; >- >- adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); >- if (!adev->vcn.saved_bo) >- return -ENOMEM; >- >- memcpy_fromio(adev->vcn.saved_bo, ptr, size); >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ if (adev->vcn.inst[i].vcpu_bo == NULL) >+ return 0; >+ >+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); >+ ptr = adev->vcn.inst[i].cpu_addr; >+ >+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); >+ if (!adev->vcn.inst[i].saved_bo) >+ return -ENOMEM; > >+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); >+ } > return 0; > } > >@@ -208,32 +251,36 @@ > { > unsigned size; > void *ptr; >+ int i; > >- if (adev->vcn.vcpu_bo == NULL) >- return -EINVAL; >- >- size = amdgpu_bo_size(adev->vcn.vcpu_bo); >- ptr = adev->vcn.cpu_addr; >- >- if (adev->vcn.saved_bo != NULL) { >- memcpy_toio(ptr, adev->vcn.saved_bo, size); >- kvfree(adev->vcn.saved_bo); >- adev->vcn.saved_bo = NULL; >- } else { >- const struct common_firmware_header *hdr; >- unsigned offset; >- >- hdr = (const struct common_firmware_header *)adev->vcn.fw->data; >- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { >- offset = le32_to_cpu(hdr->ucode_array_offset_bytes); >- memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, >- le32_to_cpu(hdr->ucode_size_bytes)); >- size -= le32_to_cpu(hdr->ucode_size_bytes); >- ptr += le32_to_cpu(hdr->ucode_size_bytes); >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ if (adev->vcn.inst[i].vcpu_bo == NULL) >+ return -EINVAL; >+ >+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); >+ ptr = adev->vcn.inst[i].cpu_addr; >+ >+ if (adev->vcn.inst[i].saved_bo != NULL) { >+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); >+ kvfree(adev->vcn.inst[i].saved_bo); >+ adev->vcn.inst[i].saved_bo = NULL; >+ } else { >+ const struct common_firmware_header *hdr; >+ unsigned offset; >+ >+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data; >+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { >+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes); >+ memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, >+ le32_to_cpu(hdr->ucode_size_bytes)); >+ size -= le32_to_cpu(hdr->ucode_size_bytes); >+ ptr += le32_to_cpu(hdr->ucode_size_bytes); >+ } >+ memset_io(ptr, 0, size); > } >- memset_io(ptr, 0, size); > } >- > return 0; > } > >@@ -241,35 +288,40 @@ > { > struct amdgpu_device *adev = > container_of(work, struct amdgpu_device, vcn.idle_work.work); >- unsigned int fences = 0; >- unsigned int i; >+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; >+ unsigned int i, j; > >- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); >- } >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); >+ } > >- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { >- struct dpg_pause_state new_state; >+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { >+ struct dpg_pause_state new_state; > >- if (fences) >- new_state.fw_based = VCN_DPG_STATE__PAUSE; >- else >- new_state.fw_based = VCN_DPG_STATE__UNPAUSE; >+ if (fence[j]) >+ new_state.fw_based = VCN_DPG_STATE__PAUSE; >+ else >+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE; >+ >+ if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) >+ new_state.jpeg = VCN_DPG_STATE__PAUSE; >+ else >+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE; > >- if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) >- new_state.jpeg = VCN_DPG_STATE__PAUSE; >- else >- new_state.jpeg = VCN_DPG_STATE__UNPAUSE; >+ adev->vcn.pause_dpg_mode(adev, &new_state); >+ } > >- adev->vcn.pause_dpg_mode(adev, &new_state); >+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); >+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); >+ fences += fence[j]; > } > >- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); >- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); >- > if (fences == 0) { > amdgpu_gfx_off_ctrl(adev, true); >- if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) >+ if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) > amdgpu_dpm_enable_uvd(adev, false); > else > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, >@@ -286,7 +338,7 @@ > > if (set_clocks) { > amdgpu_gfx_off_ctrl(adev, false); >- if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) >+ if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) > amdgpu_dpm_enable_uvd(adev, true); > else > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, >@@ -299,14 +351,14 @@ > unsigned int i; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); >+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); > } > if (fences) > new_state.fw_based = VCN_DPG_STATE__PAUSE; > else > new_state.fw_based = VCN_DPG_STATE__UNPAUSE; > >- if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) >+ if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) > new_state.jpeg = VCN_DPG_STATE__PAUSE; > else > new_state.jpeg = VCN_DPG_STATE__UNPAUSE; >@@ -332,7 +384,7 @@ > unsigned i; > int r; > >- WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); >+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); > r = amdgpu_ring_alloc(ring, 3); > if (r) > return r; >@@ -340,7 +392,7 @@ > amdgpu_ring_write(ring, 0xDEADBEEF); > amdgpu_ring_commit(ring); > for (i = 0; i < adev->usec_timeout; i++) { >- tmp = RREG32(adev->vcn.external.scratch9); >+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); > if (tmp == 0xDEADBEEF) > break; > udelay(1); >@@ -651,7 +703,7 @@ > unsigned i; > int r; > >- WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); >+ WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); > r = amdgpu_ring_alloc(ring, 3); > if (r) > return r; >@@ -661,7 +713,7 @@ > amdgpu_ring_commit(ring); > > for (i = 0; i < adev->usec_timeout; i++) { >- tmp = RREG32(adev->vcn.external.jpeg_pitch); >+ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); > if (tmp == 0xDEADBEEF) > break; > udelay(1); >@@ -735,7 +787,7 @@ > } > > for (i = 0; i < adev->usec_timeout; i++) { >- tmp = RREG32(adev->vcn.external.jpeg_pitch); >+ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); > if (tmp == 0xDEADBEEF) > break; > udelay(1); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 2019-08-31 15:09:42.578781293 -0500 >@@ -30,6 +30,11 @@ > #define AMDGPU_VCN_FIRMWARE_OFFSET 256 > #define AMDGPU_VCN_MAX_ENC_RINGS 3 > >+#define AMDGPU_MAX_VCN_INSTANCES 2 >+ >+#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) >+#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) >+ > #define VCN_DEC_KMD_CMD 0x80000000 > #define VCN_DEC_CMD_FENCE 0x00000000 > #define VCN_DEC_CMD_TRAP 0x00000001 >@@ -146,34 +151,49 @@ > unsigned data1; > unsigned cmd; > unsigned nop; >+ unsigned context_id; >+ unsigned ib_vmid; >+ unsigned ib_bar_low; >+ unsigned ib_bar_high; >+ unsigned ib_size; >+ unsigned gp_scratch8; > unsigned scratch9; > unsigned jpeg_pitch; > }; > >-struct amdgpu_vcn { >+struct amdgpu_vcn_inst { > struct amdgpu_bo *vcpu_bo; > void *cpu_addr; > uint64_t gpu_addr; >- unsigned fw_version; > void *saved_bo; >- struct delayed_work idle_work; >- const struct firmware *fw; /* VCN firmware */ > struct amdgpu_ring ring_dec; > struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; > struct amdgpu_ring ring_jpeg; > struct amdgpu_irq_src irq; >+ struct amdgpu_vcn_reg external; >+}; >+ >+struct amdgpu_vcn { >+ unsigned fw_version; >+ struct delayed_work idle_work; >+ const struct firmware *fw; /* VCN firmware */ > unsigned num_enc_rings; > enum amd_powergating_state cur_state; > struct dpg_pause_state pause_state; >- struct amdgpu_vcn_reg internal, external; >- int (*pause_dpg_mode)(struct amdgpu_device *adev, >- struct dpg_pause_state *new_state); > > bool indirect_sram; > struct amdgpu_bo *dpg_sram_bo; > void *dpg_sram_cpu_addr; > uint64_t dpg_sram_gpu_addr; > uint32_t *dpg_sram_curr_addr; >+ >+ uint8_t num_vcn_inst; >+ struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; >+ struct amdgpu_vcn_reg internal; >+ >+ unsigned harvest_config; >+ int (*pause_dpg_mode)(struct amdgpu_device *adev, >+ struct dpg_pause_state *new_state); > }; > > int amdgpu_vcn_sw_init(struct amdgpu_device *adev); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 2019-08-31 15:01:11.844736167 -0500 >@@ -430,48 +430,3 @@ > > return clk; > } >- >-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) >-{ >- struct amdgpu_virt *virt = &adev->virt; >- >- if (virt->ops && virt->ops->init_reg_access_mode) >- virt->ops->init_reg_access_mode(adev); >-} >- >-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) >-{ >- bool ret = false; >- struct amdgpu_virt *virt = &adev->virt; >- >- if (amdgpu_sriov_vf(adev) >- && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) >- ret = true; >- >- return ret; >-} >- >-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) >-{ >- bool ret = false; >- struct amdgpu_virt *virt = &adev->virt; >- >- if (amdgpu_sriov_vf(adev) >- && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) >- && !(amdgpu_sriov_runtime(adev))) >- ret = true; >- >- return ret; >-} >- >-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) >-{ >- bool ret = false; >- struct amdgpu_virt *virt = &adev->virt; >- >- if (amdgpu_sriov_vf(adev) >- && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) >- ret = true; >- >- return ret; >-} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 2019-08-31 15:01:11.844736167 -0500 >@@ -48,12 +48,6 @@ > uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; > }; > >-/* According to the fw feature, some new reg access modes are supported */ >-#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ >-#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ >-#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ >-#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ >- > /** > * struct amdgpu_virt_ops - amdgpu device virt operations > */ >@@ -65,7 +59,6 @@ > void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); > int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); > int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); >- void (*init_reg_access_mode)(struct amdgpu_device *adev); > }; > > /* >@@ -315,10 +308,4 @@ > void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); > uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); > uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); >- >-void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); >-bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); >-bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); >-bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); >- > #endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 2019-08-31 15:01:11.845736167 -0500 >@@ -302,7 +302,7 @@ > base->next = bo->vm_bo; > bo->vm_bo = base; > >- if (bo->tbo.resv != vm->root.base.bo->tbo.resv) >+ if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) > return; > > vm->bulk_moveable = false; >@@ -583,7 +583,7 @@ > for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { > struct amdgpu_vm *vm = bo_base->vm; > >- if (abo->tbo.resv == vm->root.base.bo->tbo.resv) >+ if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) > vm->bulk_moveable = false; > } > >@@ -834,7 +834,7 @@ > bp->flags |= AMDGPU_GEM_CREATE_SHADOW; > bp->type = ttm_bo_type_kernel; > if (vm->root.base.bo) >- bp->resv = vm->root.base.bo->tbo.resv; >+ bp->resv = vm->root.base.bo->tbo.base.resv; > } > > /** >@@ -1574,7 +1574,7 @@ > flags &= ~AMDGPU_PTE_EXECUTABLE; > flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; > >- if (adev->asic_type == CHIP_NAVI10) { >+ if (adev->asic_type >= CHIP_NAVI10) { > flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; > flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); > } else { >@@ -1702,7 +1702,7 @@ > ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); > pages_addr = ttm->dma_address; > } >- exclusive = reservation_object_get_excl(bo->tbo.resv); >+ exclusive = reservation_object_get_excl(bo->tbo.base.resv); > } > > if (bo) { >@@ -1712,7 +1712,7 @@ > flags = 0x0; > } > >- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) >+ if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)) > last_update = &vm->last_update; > else > last_update = &bo_va->last_pt_update; >@@ -1743,7 +1743,7 @@ > * the evicted list so that it gets validated again on the > * next command submission. > */ >- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { > uint32_t mem_type = bo->tbo.mem.mem_type; > > if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) >@@ -1879,7 +1879,7 @@ > */ > static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) > { >- struct reservation_object *resv = vm->root.base.bo->tbo.resv; >+ struct reservation_object *resv = vm->root.base.bo->tbo.base.resv; > struct dma_fence *excl, **shared; > unsigned i, shared_count; > int r; >@@ -1993,7 +1993,7 @@ > while (!list_empty(&vm->invalidated)) { > bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, > base.vm_status); >- resv = bo_va->base.bo->tbo.resv; >+ resv = bo_va->base.bo->tbo.base.resv; > spin_unlock(&vm->invalidated_lock); > > /* Try to reserve the BO to avoid clearing its ptes */ >@@ -2084,7 +2084,7 @@ > if (mapping->flags & AMDGPU_PTE_PRT) > amdgpu_vm_prt_get(adev); > >- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && >+ if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && > !bo_va->base.moved) { > list_move(&bo_va->base.vm_status, &vm->moved); > } >@@ -2416,7 +2416,8 @@ > struct amdgpu_bo *bo; > > bo = mapping->bo_va->base.bo; >- if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) >+ if (reservation_object_locking_ctx(bo->tbo.base.resv) != >+ ticket) > continue; > } > >@@ -2443,7 +2444,7 @@ > struct amdgpu_vm_bo_base **base; > > if (bo) { >- if (bo->tbo.resv == vm->root.base.bo->tbo.resv) >+ if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) > vm->bulk_moveable = false; > > for (base = &bo_va->base.bo->vm_bo; *base; >@@ -2507,7 +2508,7 @@ > for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { > struct amdgpu_vm *vm = bo_base->vm; > >- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { >+ if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { > amdgpu_vm_bo_evicted(bo_base); > continue; > } >@@ -2518,7 +2519,7 @@ > > if (bo->tbo.type == ttm_bo_type_kernel) > amdgpu_vm_bo_relocated(bo_base); >- else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) >+ else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) > amdgpu_vm_bo_moved(bo_base); > else > amdgpu_vm_bo_invalidated(bo_base); >@@ -2648,7 +2649,7 @@ > */ > long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) > { >- return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, >+ return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, > true, true, timeout); > } > >@@ -2723,7 +2724,7 @@ > if (r) > goto error_free_root; > >- r = reservation_object_reserve_shared(root->tbo.resv, 1); >+ r = reservation_object_reserve_shared(root->tbo.base.resv, 1); > if (r) > goto error_unreserve; > >@@ -2862,6 +2863,13 @@ > WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), > "CPU update of VM recommended only for large BAR system\n"); > >+ if (vm->use_cpu_for_update) >+ vm->update_funcs = &amdgpu_vm_cpu_funcs; >+ else >+ vm->update_funcs = &amdgpu_vm_sdma_funcs; >+ dma_fence_put(vm->last_update); >+ vm->last_update = NULL; >+ > if (vm->pasid) { > unsigned long flags; > >@@ -3060,12 +3068,12 @@ > switch (args->in.op) { > case AMDGPU_VM_OP_RESERVE_VMID: > /* current, we only have requirement to reserve vmid from gfxhub */ >- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); >+ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); > if (r) > return r; > break; > case AMDGPU_VM_OP_UNRESERVE_VMID: >- amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); >+ amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); > break; > default: > return -EINVAL; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 2019-08-31 15:01:11.845736167 -0500 >@@ -90,7 +90,7 @@ > | AMDGPU_PTE_WRITEABLE \ > | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) > >-/* NAVI10 only */ >+/* gfx10 */ > #define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) > #define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) > >@@ -100,9 +100,10 @@ > #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 > > /* max number of VMHUB */ >-#define AMDGPU_MAX_VMHUBS 2 >-#define AMDGPU_GFXHUB 0 >-#define AMDGPU_MMHUB 1 >+#define AMDGPU_MAX_VMHUBS 3 >+#define AMDGPU_GFXHUB_0 0 >+#define AMDGPU_MMHUB_0 1 >+#define AMDGPU_MMHUB_1 2 > > /* hardcode that limit for now */ > #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 2019-08-31 15:01:11.845736167 -0500 >@@ -72,7 +72,7 @@ > if (r) > return r; > >- r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, >+ r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, > owner, false); > if (r) > return r; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 2019-08-31 15:01:11.845736167 -0500 >@@ -25,7 +25,7 @@ > #include "amdgpu.h" > #include "amdgpu_xgmi.h" > #include "amdgpu_smu.h" >- >+#include "df/df_3_6_offset.h" > > static DEFINE_MUTEX(xgmi_mutex); > >@@ -131,9 +131,37 @@ > > } > >+#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) >+static ssize_t amdgpu_xgmi_show_error(struct device *dev, >+ struct device_attribute *attr, >+ char *buf) >+{ >+ struct drm_device *ddev = dev_get_drvdata(dev); >+ struct amdgpu_device *adev = ddev->dev_private; >+ uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; >+ uint64_t fica_out; >+ unsigned int error_count = 0; >+ >+ ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); >+ ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); > >-static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); >+ fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); >+ if (fica_out != 0x1f) >+ pr_err("xGMI error counters not enabled!\n"); >+ >+ fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); >+ >+ if ((fica_out & 0xffff) == 2) >+ error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); > >+ adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); >+ >+ return snprintf(buf, PAGE_SIZE, "%d\n", error_count); >+} >+ >+ >+static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); >+static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); > > static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, > struct amdgpu_hive_info *hive) >@@ -148,6 +176,12 @@ > return ret; > } > >+ /* Create xgmi error file */ >+ ret = device_create_file(adev->dev, &dev_attr_xgmi_error); >+ if (ret) >+ pr_err("failed to create xgmi_error\n"); >+ >+ > /* Create sysfs link to hive info folder on the first device */ > if (adev != hive->adev) { > ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, >@@ -248,7 +282,7 @@ > > dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); > >- if (is_support_sw_smu(adev)) >+ if (is_support_sw_smu_xgmi(adev)) > ret = smu_set_xgmi_pstate(&adev->smu, pstate); > if (ret) > dev_err(adev->dev, >@@ -296,23 +330,28 @@ > struct amdgpu_xgmi *entry; > struct amdgpu_device *tmp_adev = NULL; > >- int count = 0, ret = -EINVAL; >+ int count = 0, ret = 0; > > if (!adev->gmc.xgmi.supported) > return 0; > >- ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); >- if (ret) { >- dev_err(adev->dev, >- "XGMI: Failed to get node id\n"); >- return ret; >- } >+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { >+ ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); >+ if (ret) { >+ dev_err(adev->dev, >+ "XGMI: Failed to get hive id\n"); >+ return ret; >+ } > >- ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); >- if (ret) { >- dev_err(adev->dev, >- "XGMI: Failed to get hive id\n"); >- return ret; >+ ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); >+ if (ret) { >+ dev_err(adev->dev, >+ "XGMI: Failed to get node id\n"); >+ return ret; >+ } >+ } else { >+ adev->gmc.xgmi.hive_id = 16; >+ adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; > } > > hive = amdgpu_get_xgmi_hive(adev, 1); >@@ -332,29 +371,32 @@ > top_info->num_nodes = count; > hive->number_devices = count; > >- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >- /* update node list for other device in the hive */ >- if (tmp_adev != adev) { >- top_info = &tmp_adev->psp.xgmi_context.top_info; >- top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; >- top_info->num_nodes = count; >+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { >+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >+ /* update node list for other device in the hive */ >+ if (tmp_adev != adev) { >+ top_info = &tmp_adev->psp.xgmi_context.top_info; >+ top_info->nodes[count - 1].node_id = >+ adev->gmc.xgmi.node_id; >+ top_info->num_nodes = count; >+ } >+ ret = amdgpu_xgmi_update_topology(hive, tmp_adev); >+ if (ret) >+ goto exit; > } >- ret = amdgpu_xgmi_update_topology(hive, tmp_adev); >- if (ret) >- goto exit; >- } > >- /* get latest topology info for each device from psp */ >- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, >- &tmp_adev->psp.xgmi_context.top_info); >- if (ret) { >- dev_err(tmp_adev->dev, >- "XGMI: Get topology failure on device %llx, hive %llx, ret %d", >- tmp_adev->gmc.xgmi.node_id, >- tmp_adev->gmc.xgmi.hive_id, ret); >- /* To do : continue with some node failed or disable the whole hive */ >- goto exit; >+ /* get latest topology info for each device from psp */ >+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { >+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, >+ &tmp_adev->psp.xgmi_context.top_info); >+ if (ret) { >+ dev_err(tmp_adev->dev, >+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d", >+ tmp_adev->gmc.xgmi.node_id, >+ tmp_adev->gmc.xgmi.hive_id, ret); >+ /* To do : continue with some node failed or disable the whole hive */ >+ goto exit; >+ } > } > } > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c 2019-08-31 15:01:11.845736167 -0500 >@@ -0,0 +1,59 @@ >+/* >+ * Copyright 2018 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "amdgpu.h" >+#include "soc15.h" >+ >+#include "soc15_common.h" >+#include "soc15_hw_ip.h" >+#include "arct_ip_offset.h" >+ >+int arct_reg_base_init(struct amdgpu_device *adev) >+{ >+ /* HW has more IP blocks, only initialized the block needed by our driver */ >+ uint32_t i; >+ for (i = 0 ; i < MAX_INSTANCE ; ++i) { >+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); >+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); >+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); >+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); >+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); >+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); >+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); >+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); >+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); >+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); >+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); >+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); >+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); >+ adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); >+ adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); >+ adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); >+ adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); >+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); >+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); >+ } >+ return 0; >+} >+ >+ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c 2019-08-31 15:01:11.845736167 -0500 >@@ -0,0 +1,103 @@ >+/* >+ * Copyright 2016 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "amdgpu.h" >+#include "athub_v1_0.h" >+ >+#include "athub/athub_1_0_offset.h" >+#include "athub/athub_1_0_sh_mask.h" >+#include "vega10_enum.h" >+ >+#include "soc15_common.h" >+ >+static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, >+ bool enable) >+{ >+ uint32_t def, data; >+ >+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >+ >+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) >+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; >+ else >+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; >+ >+ if (def != data) >+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); >+} >+ >+static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, >+ bool enable) >+{ >+ uint32_t def, data; >+ >+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >+ >+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && >+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) >+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; >+ else >+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; >+ >+ if(def != data) >+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); >+} >+ >+int athub_v1_0_set_clockgating(struct amdgpu_device *adev, >+ enum amd_clockgating_state state) >+{ >+ if (amdgpu_sriov_vf(adev)) >+ return 0; >+ >+ switch (adev->asic_type) { >+ case CHIP_VEGA10: >+ case CHIP_VEGA12: >+ case CHIP_VEGA20: >+ case CHIP_RAVEN: >+ athub_update_medium_grain_clock_gating(adev, >+ state == AMD_CG_STATE_GATE ? true : false); >+ athub_update_medium_grain_light_sleep(adev, >+ state == AMD_CG_STATE_GATE ? true : false); >+ break; >+ default: >+ break; >+ } >+ >+ return 0; >+} >+ >+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) >+{ >+ int data; >+ >+ if (amdgpu_sriov_vf(adev)) >+ *flags = 0; >+ >+ /* AMD_CG_SUPPORT_ATHUB_MGCG */ >+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) >+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; >+ >+ /* AMD_CG_SUPPORT_ATHUB_LS */ >+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) >+ *flags |= AMD_CG_SUPPORT_ATHUB_LS; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h 2019-08-31 15:01:11.845736167 -0500 >@@ -0,0 +1,30 @@ >+/* >+ * Copyright 2016 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#ifndef __ATHUB_V1_0_H__ >+#define __ATHUB_V1_0_H__ >+ >+int athub_v1_0_set_clockgating(struct amdgpu_device *adev, >+ enum amd_clockgating_state state); >+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c 2019-08-31 15:01:11.845736167 -0500 >@@ -74,6 +74,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > athub_v2_0_update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > athub_v2_0_update_medium_grain_light_sleep(adev, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/cik.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/cik.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/cik.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/cik.c 2019-08-31 15:01:11.845736167 -0500 >@@ -1291,6 +1291,12 @@ > return r; > } > >+static enum amd_reset_method >+cik_asic_reset_method(struct amdgpu_device *adev) >+{ >+ return AMD_RESET_METHOD_LEGACY; >+} >+ > static u32 cik_get_config_memsize(struct amdgpu_device *adev) > { > return RREG32(mmCONFIG_MEMSIZE); >@@ -1823,6 +1829,7 @@ > .read_bios_from_rom = &cik_read_bios_from_rom, > .read_register = &cik_read_register, > .reset = &cik_asic_reset, >+ .reset_method = &cik_asic_reset_method, > .set_vga_state = &cik_vga_set_state, > .get_xclk = &cik_get_xclk, > .set_uvd_clocks = &cik_set_uvd_clocks, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 2019-08-31 15:01:11.846736167 -0500 >@@ -236,6 +236,7 @@ > int crtc_id, u64 crtc_base, bool async) > { > struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; >+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; > u32 tmp; > > /* flip at hsync for async, default is vsync */ >@@ -243,6 +244,9 @@ > tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, > GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); > WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); >+ /* update pitch */ >+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, >+ fb->pitches[0] / fb->format->cpp[0]); > /* update the primary scanout address */ > WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, > upper_32_bits(crtc_base)); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 2019-08-31 15:01:11.846736167 -0500 >@@ -254,6 +254,7 @@ > int crtc_id, u64 crtc_base, bool async) > { > struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; >+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; > u32 tmp; > > /* flip immediate for async, default is vsync */ >@@ -261,6 +262,9 @@ > tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, > GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); > WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); >+ /* update pitch */ >+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, >+ fb->pitches[0] / fb->format->cpp[0]); > /* update the scanout addresses */ > WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, > upper_32_bits(crtc_base)); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 2019-08-31 15:01:11.846736167 -0500 >@@ -191,10 +191,14 @@ > int crtc_id, u64 crtc_base, bool async) > { > struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; >+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; > > /* flip at hsync for async, default is vsync */ > WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? > GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); >+ /* update pitch */ >+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, >+ fb->pitches[0] / fb->format->cpp[0]); > /* update the scanout addresses */ > WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, > upper_32_bits(crtc_base)); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 2019-08-31 15:01:11.846736167 -0500 >@@ -184,10 +184,14 @@ > int crtc_id, u64 crtc_base, bool async) > { > struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; >+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; > > /* flip at hsync for async, default is vsync */ > WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? > GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); >+ /* update pitch */ >+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, >+ fb->pitches[0] / fb->format->cpp[0]); > /* update the primary scanout addresses */ > WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, > upper_32_bits(crtc_base)); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_virtual.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_virtual.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_virtual.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_virtual.c 2019-08-31 15:01:11.846736167 -0500 >@@ -454,13 +454,8 @@ > #endif > /* no DCE */ > break; >- case CHIP_VEGA10: >- case CHIP_VEGA12: >- case CHIP_VEGA20: >- case CHIP_NAVI10: >- break; > default: >- DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); >+ break; > } > return 0; > } >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/df_v3_6.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/df_v3_6.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 2019-08-31 15:01:11.846736167 -0500 >@@ -93,6 +93,96 @@ > NULL > }; > >+static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, >+ uint32_t ficaa_val) >+{ >+ unsigned long flags, address, data; >+ uint32_t ficadl_val, ficadh_val; >+ >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); >+ WREG32(data, ficaa_val); >+ >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); >+ ficadl_val = RREG32(data); >+ >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); >+ ficadh_val = RREG32(data); >+ >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+ >+ return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); >+} >+ >+static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, >+ uint32_t ficadl_val, uint32_t ficadh_val) >+{ >+ unsigned long flags, address, data; >+ >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); >+ WREG32(data, ficaa_val); >+ >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); >+ WREG32(data, ficadl_val); >+ >+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); >+ WREG32(data, ficadh_val); >+ >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+} >+ >+/* >+ * df_v3_6_perfmon_rreg - read perfmon lo and hi >+ * >+ * required to be atomic. no mmio method provided so subsequent reads for lo >+ * and hi require to preserve df finite state machine >+ */ >+static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, >+ uint32_t lo_addr, uint32_t *lo_val, >+ uint32_t hi_addr, uint32_t *hi_val) >+{ >+ unsigned long flags, address, data; >+ >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ WREG32(address, lo_addr); >+ *lo_val = RREG32(data); >+ WREG32(address, hi_addr); >+ *hi_val = RREG32(data); >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+} >+ >+/* >+ * df_v3_6_perfmon_wreg - write to perfmon lo and hi >+ * >+ * required to be atomic. no mmio method provided so subsequent reads after >+ * data writes cannot occur to preserve data fabrics finite state machine. >+ */ >+static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, >+ uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) >+{ >+ unsigned long flags, address, data; >+ >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ WREG32(address, lo_addr); >+ WREG32(data, lo_val); >+ WREG32(address, hi_addr); >+ WREG32(data, hi_val); >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+} >+ > /* get the number of df counters available */ > static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, > struct device_attribute *attr, >@@ -268,6 +358,10 @@ > uint32_t *lo_val, > uint32_t *hi_val) > { >+ >+ uint32_t eventsel, instance, unitmask; >+ uint32_t instance_10, instance_5432, instance_76; >+ > df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); > > if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { >@@ -276,40 +370,33 @@ > return -ENXIO; > } > >- if (lo_val && hi_val) { >- uint32_t eventsel, instance, unitmask; >- uint32_t instance_10, instance_5432, instance_76; >- >- eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; >- unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; >- instance = DF_V3_6_GET_INSTANCE(config); >- >- instance_10 = instance & 0x3; >- instance_5432 = (instance >> 2) & 0xf; >- instance_76 = (instance >> 6) & 0x3; >+ eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; >+ unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; >+ instance = DF_V3_6_GET_INSTANCE(config); >+ >+ instance_10 = instance & 0x3; >+ instance_5432 = (instance >> 2) & 0xf; >+ instance_76 = (instance >> 6) & 0x3; > >- *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; >- *hi_val = (instance_76 << 29) | instance_5432; >- } >+ *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); >+ *hi_val = (instance_76 << 29) | instance_5432; >+ >+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", >+ config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val); > > return 0; > } > >-/* assign df performance counters for read */ >-static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, >- uint64_t config, >- int *is_assigned) >+/* add df performance counters for read */ >+static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, >+ uint64_t config) > { > int i, target_cntr; > >- *is_assigned = 0; >- > target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); > >- if (target_cntr >= 0) { >- *is_assigned = 1; >+ if (target_cntr >= 0) > return 0; >- } > > for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { > if (adev->df_perfmon_config_assign_mask[i] == 0U) { >@@ -344,45 +431,13 @@ > if ((lo_base_addr == 0) || (hi_base_addr == 0)) > return; > >- WREG32_PCIE(lo_base_addr, 0UL); >- WREG32_PCIE(hi_base_addr, 0UL); >-} >- >- >-static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, >- uint64_t config) >-{ >- uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; >- int ret, is_assigned; >- >- ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); >- >- if (ret || is_assigned) >- return ret; >- >- ret = df_v3_6_pmc_get_ctrl_settings(adev, >- config, >- &lo_base_addr, >- &hi_base_addr, >- &lo_val, >- &hi_val); >- >- if (ret) >- return ret; >- >- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", >- config, lo_base_addr, hi_base_addr, lo_val, hi_val); >- >- WREG32_PCIE(lo_base_addr, lo_val); >- WREG32_PCIE(hi_base_addr, hi_val); >- >- return ret; >+ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); > } > > static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, > int is_enable) > { >- uint32_t lo_base_addr, hi_base_addr, lo_val; >+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; > int ret = 0; > > switch (adev->asic_type) { >@@ -391,24 +446,20 @@ > df_v3_6_reset_perfmon_cntr(adev, config); > > if (is_enable) { >- ret = df_v3_6_add_perfmon_cntr(adev, config); >+ ret = df_v3_6_pmc_add_cntr(adev, config); > } else { > ret = df_v3_6_pmc_get_ctrl_settings(adev, > config, > &lo_base_addr, > &hi_base_addr, >- NULL, >- NULL); >+ &lo_val, >+ &hi_val); > > if (ret) > return ret; > >- lo_val = RREG32_PCIE(lo_base_addr); >- >- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", >- config, lo_base_addr, hi_base_addr, lo_val); >- >- WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); >+ df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, >+ hi_base_addr, hi_val); > } > > break; >@@ -422,7 +473,7 @@ > static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, > int is_disable) > { >- uint32_t lo_base_addr, hi_base_addr, lo_val; >+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; > int ret = 0; > > switch (adev->asic_type) { >@@ -431,18 +482,13 @@ > config, > &lo_base_addr, > &hi_base_addr, >- NULL, >- NULL); >+ &lo_val, >+ &hi_val); > > if (ret) > return ret; > >- lo_val = RREG32_PCIE(lo_base_addr); >- >- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", >- config, lo_base_addr, hi_base_addr, lo_val); >- >- WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); >+ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); > > if (is_disable) > df_v3_6_pmc_release_cntr(adev, config); >@@ -471,8 +517,8 @@ > if ((lo_base_addr == 0) || (hi_base_addr == 0)) > return; > >- lo_val = RREG32_PCIE(lo_base_addr); >- hi_val = RREG32_PCIE(hi_base_addr); >+ df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, >+ hi_base_addr, &hi_val); > > *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); > >@@ -480,7 +526,7 @@ > *count = 0; > > DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", >- config, lo_base_addr, hi_base_addr, lo_val, hi_val); >+ config, lo_base_addr, hi_base_addr, lo_val, hi_val); > > break; > >@@ -499,5 +545,7 @@ > .get_clockgating_state = df_v3_6_get_clockgating_state, > .pmc_start = df_v3_6_pmc_start, > .pmc_stop = df_v3_6_pmc_stop, >- .pmc_get_count = df_v3_6_pmc_get_count >+ .pmc_get_count = df_v3_6_pmc_get_count, >+ .get_fica = df_v3_6_get_fica, >+ .set_fica = df_v3_6_set_fica > }; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 2019-08-31 15:01:11.847736167 -0500 >@@ -357,7 +357,7 @@ > > void gfxhub_v1_0_init(struct amdgpu_device *adev) > { >- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; >+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > > hub->ctx0_ptb_addr_lo32 = > SOC15_REG_OFFSET(GC, 0, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 2019-08-31 15:01:11.847736167 -0500 >@@ -140,7 +140,7 @@ > /* XXX for emulation, Refer to closed source code.*/ > tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, > L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); >- tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); >+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); > tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); > tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); > WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); >@@ -333,7 +333,7 @@ > > void gfxhub_v2_0_init(struct amdgpu_device *adev) > { >- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; >+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > > hub->ctx0_ptb_addr_lo32 = > SOC15_REG_OFFSET(GC, 0, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 2019-08-31 15:01:12.277736205 -0500 >@@ -20,8 +20,12 @@ > * OTHER DEALINGS IN THE SOFTWARE. > * > */ >+ >+#include <linux/delay.h> >+#include <linux/kernel.h> > #include <linux/firmware.h> >-#include <drm/drmP.h> >+#include <linux/module.h> >+#include <linux/pci.h> > #include "amdgpu.h" > #include "amdgpu_gfx.h" > #include "amdgpu_psp.h" >@@ -56,6 +60,9 @@ > #define F32_CE_PROGRAM_RAM_SIZE 65536 > #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L > >+#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 >+#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 >+ > MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); > MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); > MODULE_FIRMWARE("amdgpu/navi10_me.bin"); >@@ -63,6 +70,20 @@ > MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); > MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); > >+MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_me.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); >+ >+MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_me.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); >+ > static const struct soc15_reg_golden golden_settings_gc_10_1[] = > { > SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), >@@ -109,6 +130,99 @@ > /* Pending on emulation bring up */ > }; > >+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = >+{ >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), >+}; >+ >+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = >+{ >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) >+}; >+ >+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = >+{ >+ /* Pending on emulation bring up */ >+}; >+ >+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = >+{ >+ /* Pending on emulation bring up */ >+}; >+ > #define DEFAULT_SH_MEM_CONFIG \ > ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ > (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ >@@ -250,6 +364,22 @@ > golden_settings_gc_10_0_nv10, > (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); > break; >+ case CHIP_NAVI14: >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_10_1_1, >+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_10_1_nv14, >+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); >+ break; >+ case CHIP_NAVI12: >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_10_1_2, >+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_10_1_2_nv12, >+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); >+ break; > default: > break; > } >@@ -331,7 +461,7 @@ > if (amdgpu_emu_mode == 1) > msleep(1); > else >- DRM_UDELAY(1); >+ udelay(1); > } > if (i < adev->usec_timeout) { > if (amdgpu_emu_mode == 1) >@@ -481,6 +611,12 @@ > case CHIP_NAVI10: > chip_name = "navi10"; > break; >+ case CHIP_NAVI14: >+ chip_name = "navi14"; >+ break; >+ case CHIP_NAVI12: >+ chip_name = "navi12"; >+ break; > default: > BUG(); > } >@@ -1026,6 +1162,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > adev->gfx.config.max_hw_contexts = 8; > adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; > adev->gfx.config.sc_prim_fifo_size_backend = 0x100; >@@ -1133,6 +1271,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > adev->gfx.me.num_me = 1; > adev->gfx.me.num_pipe_per_me = 2; > adev->gfx.me.num_queue_per_pipe = 1; >@@ -1452,6 +1592,25 @@ > } > } > >+static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) >+{ >+ int vmid; >+ >+ /* >+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA >+ * access. Compute VMIDs should be enabled by FW for target VMIDs, >+ * the driver can enable them for graphics. VMID0 should maintain >+ * access so that HWS firmware can save/restore entries. >+ */ >+ for (vmid = 1; vmid < 16; vmid++) { >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); >+ } >+} >+ >+ > static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) > { > int i, j, k; >@@ -1461,7 +1620,8 @@ > u32 utcl_invreq_disable = 0; > /* > * GCRD_TARGETS_DISABLE field contains >- * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] >+ * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] >+ * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] > */ > u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( > 2 * max_wgp_per_sh + /* TCP */ >@@ -1469,7 +1629,8 @@ > 4); /* GL1C */ > /* > * UTCL1_UTCL0_INVREQ_DISABLE field contains >- * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] >+ * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] >+ * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] > */ > u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( > 2 * max_wgp_per_sh + /* TCP */ >@@ -1477,7 +1638,9 @@ > 4 + /* RMI */ > 1); /* SQG */ > >- if (adev->asic_type == CHIP_NAVI10) { >+ if (adev->asic_type == CHIP_NAVI10 || >+ adev->asic_type == CHIP_NAVI14 || >+ adev->asic_type == CHIP_NAVI12) { > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { >@@ -1535,7 +1698,7 @@ > /* XXX SH_MEM regs */ > /* where to put LDS, scratch, GPUVM in FSA64 space */ > mutex_lock(&adev->srbm_mutex); >- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { >+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { > nv_grbm_select(adev, 0, 0, 0, i); > /* CP and shaders */ > WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); >@@ -1552,6 +1715,7 @@ > mutex_unlock(&adev->srbm_mutex); > > gfx_v10_0_init_compute_vmid(adev); >+ gfx_v10_0_init_gds_vmid(adev); > > } > >@@ -1584,9 +1748,12 @@ > > static void gfx_v10_0_init_pg(struct amdgpu_device *adev) > { >+ int i; >+ > gfx_v10_0_init_csb(adev); > >- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); >+ for (i = 0; i < adev->num_vmhubs; i++) >+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); > > /* TODO: init power gating */ > return; >@@ -1624,9 +1791,9 @@ > * hence no handshake between SMU & RLC > * GFXOFF will be disabled > */ >- rlc_pg_cntl |= 0x80000; >+ rlc_pg_cntl |= 0x800000; > } else >- rlc_pg_cntl &= ~0x80000; >+ rlc_pg_cntl &= ~0x800000; > WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); > } > >@@ -3614,20 +3781,12 @@ > > static int gfx_v10_0_suspend(void *handle) > { >- struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- >- adev->in_suspend = true; >- return gfx_v10_0_hw_fini(adev); >+ return gfx_v10_0_hw_fini(handle); > } > > static int gfx_v10_0_resume(void *handle) > { >- struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- int r; >- >- r = gfx_v10_0_hw_init(adev); >- adev->in_suspend = false; >- return r; >+ return gfx_v10_0_hw_init(handle); > } > > static bool gfx_v10_0_is_idle(void *handle) >@@ -4037,6 +4196,7 @@ > bool enable = (state == AMD_PG_STATE_GATE) ? true : false; > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: > if (!enable) { > amdgpu_gfx_off_ctrl(adev, false); > cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); >@@ -4056,6 +4216,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > gfx_v10_0_update_gfx_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > break; >@@ -4453,7 +4615,7 @@ > if (ring->trail_seq == > le32_to_cpu(*(ring->trail_fence_cpu_addr))) > break; >- DRM_UDELAY(1); >+ udelay(1); > } > > if (i >= adev->usec_timeout) { >@@ -4927,7 +5089,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v10_0_ring_get_rptr_gfx, > .get_wptr = gfx_v10_0_ring_get_wptr_gfx, > .set_wptr = gfx_v10_0_ring_set_wptr_gfx, >@@ -4978,7 +5140,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v10_0_ring_get_rptr_compute, > .get_wptr = gfx_v10_0_ring_get_wptr_compute, > .set_wptr = gfx_v10_0_ring_set_wptr_compute, >@@ -5011,7 +5173,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v10_0_ring_get_rptr_compute, > .get_wptr = gfx_v10_0_ring_get_wptr_compute, > .set_wptr = gfx_v10_0_ring_set_wptr_compute, >@@ -5088,6 +5250,8 @@ > { > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; > break; > default: >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2019-08-31 15:01:11.847736167 -0500 >@@ -1890,6 +1890,24 @@ > } > } > >+static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) >+{ >+ int vmid; >+ >+ /* >+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA >+ * access. Compute VMIDs should be enabled by FW for target VMIDs, >+ * the driver can enable them for graphics. VMID0 should maintain >+ * access so that HWS firmware can save/restore entries. >+ */ >+ for (vmid = 1; vmid < 16; vmid++) { >+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); >+ } >+} >+ > static void gfx_v7_0_config_init(struct amdgpu_device *adev) > { > adev->gfx.config.double_offchip_lds_buf = 1; >@@ -1968,6 +1986,7 @@ > mutex_unlock(&adev->srbm_mutex); > > gfx_v7_0_init_compute_vmid(adev); >+ gfx_v7_0_init_gds_vmid(adev); > > WREG32(mmSX_DEBUG_1, 0x20); > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2019-08-31 15:01:12.274736205 -0500 >@@ -3750,6 +3750,24 @@ > } > } > >+static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) >+{ >+ int vmid; >+ >+ /* >+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA >+ * access. Compute VMIDs should be enabled by FW for target VMIDs, >+ * the driver can enable them for graphics. VMID0 should maintain >+ * access so that HWS firmware can save/restore entries. >+ */ >+ for (vmid = 1; vmid < 16; vmid++) { >+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); >+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); >+ } >+} >+ > static void gfx_v8_0_config_init(struct amdgpu_device *adev) > { > switch (adev->asic_type) { >@@ -3816,6 +3834,7 @@ > mutex_unlock(&adev->srbm_mutex); > > gfx_v8_0_init_compute_vmid(adev); >+ gfx_v8_0_init_gds_vmid(adev); > > mutex_lock(&adev->grbm_idx_mutex); > /* >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 2019-08-31 15:09:16.619778999 -0500 >@@ -36,10 +36,10 @@ > > #include "gc/gc_9_0_offset.h" > #include "gc/gc_9_0_sh_mask.h" >+ > #include "vega10_enum.h" > #include "hdp/hdp_4_0_offset.h" > >-#include "soc15.h" > #include "soc15_common.h" > #include "clearstate_gfx9.h" > #include "v9_structs.h" >@@ -60,6 +60,9 @@ > #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L > #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L > >+#define mmGCEA_PROBE_MAP 0x070c >+#define mmGCEA_PROBE_MAP_BASE_IDX 0 >+ > MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); > MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); > MODULE_FIRMWARE("amdgpu/vega10_me.bin"); >@@ -104,6 +107,397 @@ > MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); > MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); > >+MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); >+ >+MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_me.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); >+ >+#define mmTCP_CHAN_STEER_0_ARCT 0x0b03 >+#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 >+#define mmTCP_CHAN_STEER_1_ARCT 0x0b04 >+#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 >+#define mmTCP_CHAN_STEER_2_ARCT 0x0b09 >+#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 >+#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a >+#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 >+#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b >+#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 >+#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c >+#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 >+ >+enum ta_ras_gfx_subblock { >+ /*CPC*/ >+ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, >+ TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, >+ TA_RAS_BLOCK__GFX_CPC_UCODE, >+ TA_RAS_BLOCK__GFX_DC_STATE_ME1, >+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, >+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, >+ TA_RAS_BLOCK__GFX_DC_STATE_ME2, >+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, >+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, >+ TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, >+ /* CPF*/ >+ TA_RAS_BLOCK__GFX_CPF_INDEX_START, >+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, >+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, >+ TA_RAS_BLOCK__GFX_CPF_TAG, >+ TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, >+ /* CPG*/ >+ TA_RAS_BLOCK__GFX_CPG_INDEX_START, >+ TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, >+ TA_RAS_BLOCK__GFX_CPG_DMA_TAG, >+ TA_RAS_BLOCK__GFX_CPG_TAG, >+ TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, >+ /* GDS*/ >+ TA_RAS_BLOCK__GFX_GDS_INDEX_START, >+ TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, >+ TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, >+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, >+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, >+ TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, >+ TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, >+ /* SPI*/ >+ TA_RAS_BLOCK__GFX_SPI_SR_MEM, >+ /* SQ*/ >+ TA_RAS_BLOCK__GFX_SQ_INDEX_START, >+ TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, >+ TA_RAS_BLOCK__GFX_SQ_LDS_D, >+ TA_RAS_BLOCK__GFX_SQ_LDS_I, >+ TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ >+ TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, >+ /* SQC (3 ranges)*/ >+ TA_RAS_BLOCK__GFX_SQC_INDEX_START, >+ /* SQC range 0*/ >+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, >+ TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = >+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, >+ TA_RAS_BLOCK__GFX_SQC_INDEX0_END = >+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, >+ /* SQC range 1*/ >+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = >+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, >+ TA_RAS_BLOCK__GFX_SQC_INDEX1_END = >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, >+ /* SQC range 2*/ >+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = >+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, >+ TA_RAS_BLOCK__GFX_SQC_INDEX2_END = >+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, >+ TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, >+ /* TA*/ >+ TA_RAS_BLOCK__GFX_TA_INDEX_START, >+ TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, >+ TA_RAS_BLOCK__GFX_TA_FS_AFIFO, >+ TA_RAS_BLOCK__GFX_TA_FL_LFIFO, >+ TA_RAS_BLOCK__GFX_TA_FX_LFIFO, >+ TA_RAS_BLOCK__GFX_TA_FS_CFIFO, >+ TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, >+ /* TCA*/ >+ TA_RAS_BLOCK__GFX_TCA_INDEX_START, >+ TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, >+ TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, >+ TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, >+ /* TCC (5 sub-ranges)*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX_START, >+ /* TCC range 0*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, >+ TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, >+ TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, >+ TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, >+ /* TCC range 1*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX1_START, >+ TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, >+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, >+ TA_RAS_BLOCK__GFX_TCC_INDEX1_END = >+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, >+ /* TCC range 2*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX2_START, >+ TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, >+ TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, >+ TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, >+ TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, >+ TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, >+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, >+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, >+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, >+ TA_RAS_BLOCK__GFX_TCC_INDEX2_END = >+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, >+ /* TCC range 3*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX3_START, >+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, >+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, >+ TA_RAS_BLOCK__GFX_TCC_INDEX3_END = >+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, >+ /* TCC range 4*/ >+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START, >+ TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = >+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START, >+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, >+ TA_RAS_BLOCK__GFX_TCC_INDEX4_END = >+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, >+ TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, >+ /* TCI*/ >+ TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, >+ /* TCP*/ >+ TA_RAS_BLOCK__GFX_TCP_INDEX_START, >+ TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, >+ TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, >+ TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, >+ TA_RAS_BLOCK__GFX_TCP_VM_FIFO, >+ TA_RAS_BLOCK__GFX_TCP_DB_RAM, >+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, >+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, >+ TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, >+ /* TD*/ >+ TA_RAS_BLOCK__GFX_TD_INDEX_START, >+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, >+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, >+ TA_RAS_BLOCK__GFX_TD_CS_FIFO, >+ TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, >+ /* EA (3 sub-ranges)*/ >+ TA_RAS_BLOCK__GFX_EA_INDEX_START, >+ /* EA range 0*/ >+ TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, >+ TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, >+ TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, >+ TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, >+ TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, >+ TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, >+ TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, >+ TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, >+ TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, >+ TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, >+ /* EA range 1*/ >+ TA_RAS_BLOCK__GFX_EA_INDEX1_START, >+ TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, >+ TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, >+ TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, >+ TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, >+ TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, >+ TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, >+ TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, >+ TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, >+ /* EA range 2*/ >+ TA_RAS_BLOCK__GFX_EA_INDEX2_START, >+ TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, >+ TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, >+ TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, >+ TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, >+ TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, >+ TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, >+ /* UTC VM L2 bank*/ >+ TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, >+ /* UTC VM walker*/ >+ TA_RAS_BLOCK__UTC_VML2_WALKER, >+ /* UTC ATC L2 2MB cache*/ >+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, >+ /* UTC ATC L2 4KB cache*/ >+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, >+ TA_RAS_BLOCK__GFX_MAX >+}; >+ >+struct ras_gfx_subblock { >+ unsigned char *name; >+ int ta_subblock; >+ int hw_supported_error_type; >+ int sw_supported_error_type; >+}; >+ >+#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ >+ [AMDGPU_RAS_BLOCK__##subblock] = { \ >+ #subblock, \ >+ TA_RAS_BLOCK__##subblock, \ >+ ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ >+ (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ >+ } >+ >+static const struct ras_gfx_subblock ras_gfx_subblocks[] = { >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, >+ 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, >+ 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, >+ 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, >+ 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, >+ 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, >+ 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, >+ 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, >+ 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), >+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), >+}; >+ > static const struct soc15_reg_golden golden_settings_gc_9_0[] = > { > SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), >@@ -227,6 +621,22 @@ > SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), > }; > >+static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = >+{ >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), >+}; >+ > static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = > { > SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), >@@ -271,6 +681,18 @@ > SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) > }; > >+static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = >+{ >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), >+}; >+ > static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = > { > mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, >@@ -310,19 +732,21 @@ > static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); > static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); > static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); >+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, >+ void *ras_error_status); >+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, >+ void *inject_if); > > static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) > { > switch (adev->asic_type) { > case CHIP_VEGA10: >- if (!amdgpu_virt_support_skip_setting(adev)) { >- soc15_program_register_sequence(adev, >- golden_settings_gc_9_0, >- ARRAY_SIZE(golden_settings_gc_9_0)); >- soc15_program_register_sequence(adev, >- golden_settings_gc_9_0_vg10, >- ARRAY_SIZE(golden_settings_gc_9_0_vg10)); >- } >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_9_0, >+ ARRAY_SIZE(golden_settings_gc_9_0)); >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_9_0_vg10, >+ ARRAY_SIZE(golden_settings_gc_9_0_vg10)); > break; > case CHIP_VEGA12: > soc15_program_register_sequence(adev, >@@ -340,6 +764,11 @@ > golden_settings_gc_9_0_vg20, > ARRAY_SIZE(golden_settings_gc_9_0_vg20)); > break; >+ case CHIP_ARCTURUS: >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_9_4_1_arct, >+ ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); >+ break; > case CHIP_RAVEN: > soc15_program_register_sequence(adev, golden_settings_gc_9_1, > ARRAY_SIZE(golden_settings_gc_9_1)); >@@ -352,12 +781,18 @@ > golden_settings_gc_9_1_rv1, > ARRAY_SIZE(golden_settings_gc_9_1_rv1)); > break; >+ case CHIP_RENOIR: >+ soc15_program_register_sequence(adev, >+ golden_settings_gc_9_1_rn, >+ ARRAY_SIZE(golden_settings_gc_9_1_rn)); >+ return; /* for renoir, don't need common goldensetting */ > default: > break; > } > >- soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, >- (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); >+ if (adev->asic_type != CHIP_ARCTURUS) >+ soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, >+ (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); > } > > static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) >@@ -596,14 +1031,14 @@ > case CHIP_VEGA20: > break; > case CHIP_RAVEN: >- if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) >- break; >- if ((adev->gfx.rlc_fw_version != 106 && >- adev->gfx.rlc_fw_version < 531) || >- (adev->gfx.rlc_fw_version == 53815) || >- (adev->gfx.rlc_feature_version < 1) || >- !adev->gfx.rlc.is_rlc_v2_1) >+ if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) >+ &&((adev->gfx.rlc_fw_version != 106 && >+ adev->gfx.rlc_fw_version < 531) || >+ (adev->gfx.rlc_fw_version == 53815) || >+ (adev->gfx.rlc_feature_version < 1) || >+ !adev->gfx.rlc.is_rlc_v2_1)) > adev->pm.pp_feature &= ~PP_GFXOFF_MASK; >+ > if (adev->pm.pp_feature & PP_GFXOFF_MASK) > adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | > AMD_PG_SUPPORT_CP | >@@ -614,44 +1049,14 @@ > } > } > >-static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) >+static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, >+ const char *chip_name) > { >- const char *chip_name; > char fw_name[30]; > int err; > struct amdgpu_firmware_info *info = NULL; > const struct common_firmware_header *header = NULL; > const struct gfx_firmware_header_v1_0 *cp_hdr; >- const struct rlc_firmware_header_v2_0 *rlc_hdr; >- unsigned int *tmp = NULL; >- unsigned int i = 0; >- uint16_t version_major; >- uint16_t version_minor; >- uint32_t smu_version; >- >- DRM_DEBUG("\n"); >- >- switch (adev->asic_type) { >- case CHIP_VEGA10: >- chip_name = "vega10"; >- break; >- case CHIP_VEGA12: >- chip_name = "vega12"; >- break; >- case CHIP_VEGA20: >- chip_name = "vega20"; >- break; >- case CHIP_RAVEN: >- if (adev->rev_id >= 8) >- chip_name = "raven2"; >- else if (adev->pdev->device == 0x15d8) >- chip_name = "picasso"; >- else >- chip_name = "raven"; >- break; >- default: >- BUG(); >- } > > snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); > err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); >@@ -686,6 +1091,58 @@ > adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); > adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); > >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; >+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; >+ info->fw = adev->gfx.pfp_fw; >+ header = (const struct common_firmware_header *)info->fw->data; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >+ >+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; >+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME; >+ info->fw = adev->gfx.me_fw; >+ header = (const struct common_firmware_header *)info->fw->data; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >+ >+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; >+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE; >+ info->fw = adev->gfx.ce_fw; >+ header = (const struct common_firmware_header *)info->fw->data; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >+ } >+ >+out: >+ if (err) { >+ dev_err(adev->dev, >+ "gfx9: Failed to load firmware \"%s\"\n", >+ fw_name); >+ release_firmware(adev->gfx.pfp_fw); >+ adev->gfx.pfp_fw = NULL; >+ release_firmware(adev->gfx.me_fw); >+ adev->gfx.me_fw = NULL; >+ release_firmware(adev->gfx.ce_fw); >+ adev->gfx.ce_fw = NULL; >+ } >+ return err; >+} >+ >+static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, >+ const char *chip_name) >+{ >+ char fw_name[30]; >+ int err; >+ struct amdgpu_firmware_info *info = NULL; >+ const struct common_firmware_header *header = NULL; >+ const struct rlc_firmware_header_v2_0 *rlc_hdr; >+ unsigned int *tmp = NULL; >+ unsigned int i = 0; >+ uint16_t version_major; >+ uint16_t version_minor; >+ uint32_t smu_version; >+ > /* > * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin > * instead of picasso_rlc.bin. >@@ -760,57 +1217,7 @@ > if (adev->gfx.rlc.is_rlc_v2_1) > gfx_v9_0_init_rlc_ext_microcode(adev); > >- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); >- err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); >- if (err) >- goto out; >- err = amdgpu_ucode_validate(adev->gfx.mec_fw); >- if (err) >- goto out; >- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; >- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); >- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); >- >- >- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); >- err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); >- if (!err) { >- err = amdgpu_ucode_validate(adev->gfx.mec2_fw); >- if (err) >- goto out; >- cp_hdr = (const struct gfx_firmware_header_v1_0 *) >- adev->gfx.mec2_fw->data; >- adev->gfx.mec2_fw_version = >- le32_to_cpu(cp_hdr->header.ucode_version); >- adev->gfx.mec2_feature_version = >- le32_to_cpu(cp_hdr->ucode_feature_version); >- } else { >- err = 0; >- adev->gfx.mec2_fw = NULL; >- } >- > if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; >- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; >- info->fw = adev->gfx.pfp_fw; >- header = (const struct common_firmware_header *)info->fw->data; >- adev->firmware.fw_size += >- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >- >- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; >- info->ucode_id = AMDGPU_UCODE_ID_CP_ME; >- info->fw = adev->gfx.me_fw; >- header = (const struct common_firmware_header *)info->fw->data; >- adev->firmware.fw_size += >- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >- >- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; >- info->ucode_id = AMDGPU_UCODE_ID_CP_CE; >- info->fw = adev->gfx.ce_fw; >- header = (const struct common_firmware_header *)info->fw->data; >- adev->firmware.fw_size += >- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); >- > info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; > info->ucode_id = AMDGPU_UCODE_ID_RLC_G; > info->fw = adev->gfx.rlc_fw; >@@ -840,7 +1247,58 @@ > adev->firmware.fw_size += > ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); > } >+ } > >+out: >+ if (err) { >+ dev_err(adev->dev, >+ "gfx9: Failed to load firmware \"%s\"\n", >+ fw_name); >+ release_firmware(adev->gfx.rlc_fw); >+ adev->gfx.rlc_fw = NULL; >+ } >+ return err; >+} >+ >+static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, >+ const char *chip_name) >+{ >+ char fw_name[30]; >+ int err; >+ struct amdgpu_firmware_info *info = NULL; >+ const struct common_firmware_header *header = NULL; >+ const struct gfx_firmware_header_v1_0 *cp_hdr; >+ >+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); >+ err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); >+ if (err) >+ goto out; >+ err = amdgpu_ucode_validate(adev->gfx.mec_fw); >+ if (err) >+ goto out; >+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; >+ adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); >+ adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); >+ >+ >+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); >+ err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); >+ if (!err) { >+ err = amdgpu_ucode_validate(adev->gfx.mec2_fw); >+ if (err) >+ goto out; >+ cp_hdr = (const struct gfx_firmware_header_v1_0 *) >+ adev->gfx.mec2_fw->data; >+ adev->gfx.mec2_fw_version = >+ le32_to_cpu(cp_hdr->header.ucode_version); >+ adev->gfx.mec2_feature_version = >+ le32_to_cpu(cp_hdr->ucode_feature_version); >+ } else { >+ err = 0; >+ adev->gfx.mec2_fw = NULL; >+ } >+ >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { > info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; > info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; > info->fw = adev->gfx.mec_fw; >@@ -863,13 +1321,18 @@ > cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; > adev->firmware.fw_size += > ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); >- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; >- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; >- info->fw = adev->gfx.mec2_fw; >- adev->firmware.fw_size += >- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); >- } > >+ /* TODO: Determine if MEC2 JT FW loading can be removed >+ for all GFX V9 asic and above */ >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; >+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; >+ info->fw = adev->gfx.mec2_fw; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, >+ PAGE_SIZE); >+ } >+ } > } > > out: >@@ -879,14 +1342,6 @@ > dev_err(adev->dev, > "gfx9: Failed to load firmware \"%s\"\n", > fw_name); >- release_firmware(adev->gfx.pfp_fw); >- adev->gfx.pfp_fw = NULL; >- release_firmware(adev->gfx.me_fw); >- adev->gfx.me_fw = NULL; >- release_firmware(adev->gfx.ce_fw); >- adev->gfx.ce_fw = NULL; >- release_firmware(adev->gfx.rlc_fw); >- adev->gfx.rlc_fw = NULL; > release_firmware(adev->gfx.mec_fw); > adev->gfx.mec_fw = NULL; > release_firmware(adev->gfx.mec2_fw); >@@ -895,6 +1350,59 @@ > return err; > } > >+static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) >+{ >+ const char *chip_name; >+ int r; >+ >+ DRM_DEBUG("\n"); >+ >+ switch (adev->asic_type) { >+ case CHIP_VEGA10: >+ chip_name = "vega10"; >+ break; >+ case CHIP_VEGA12: >+ chip_name = "vega12"; >+ break; >+ case CHIP_VEGA20: >+ chip_name = "vega20"; >+ break; >+ case CHIP_RAVEN: >+ if (adev->rev_id >= 8) >+ chip_name = "raven2"; >+ else if (adev->pdev->device == 0x15d8) >+ chip_name = "picasso"; >+ else >+ chip_name = "raven"; >+ break; >+ case CHIP_ARCTURUS: >+ chip_name = "arcturus"; >+ break; >+ case CHIP_RENOIR: >+ chip_name = "renoir"; >+ break; >+ default: >+ BUG(); >+ } >+ >+ /* No CPG in Arcturus */ >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); >+ if (r) >+ return r; >+ } >+ >+ r = gfx_v9_0_init_rlc_microcode(adev, chip_name); >+ if (r) >+ return r; >+ >+ r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); >+ if (r) >+ return r; >+ >+ return r; >+} >+ > static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) > { > u32 count = 0; >@@ -1132,7 +1640,7 @@ > return r; > } > >- if (adev->asic_type == CHIP_RAVEN) { >+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { > /* TODO: double check the cp_table_size for RV */ > adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ > r = amdgpu_gfx_rlc_init_cpt(adev); >@@ -1142,6 +1650,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ case CHIP_RENOIR: > gfx_v9_0_init_lbpw(adev); > break; > case CHIP_VEGA20: >@@ -1328,7 +1837,9 @@ > .read_wave_data = &gfx_v9_0_read_wave_data, > .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, > .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, >- .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q >+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, >+ .ras_error_inject = &gfx_v9_0_ras_error_inject, >+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count > }; > > static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) >@@ -1381,6 +1892,26 @@ > else > gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; > break; >+ case CHIP_ARCTURUS: >+ adev->gfx.config.max_hw_contexts = 8; >+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; >+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100; >+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; >+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; >+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); >+ gb_addr_config &= ~0xf3e777ff; >+ gb_addr_config |= 0x22014042; >+ break; >+ case CHIP_RENOIR: >+ adev->gfx.config.max_hw_contexts = 8; >+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; >+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100; >+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; >+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; >+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); >+ gb_addr_config &= ~0xf3e777ff; >+ gb_addr_config |= 0x22010042; >+ break; > default: > BUG(); > break; >@@ -1657,6 +2188,8 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: > adev->gfx.mec.num_mec = 2; > break; > default: >@@ -1814,7 +2347,7 @@ > gfx_v9_0_mec_fini(adev); > gfx_v9_0_ngg_fini(adev); > amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); >- if (adev->asic_type == CHIP_RAVEN) { >+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { > amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, > &adev->gfx.rlc.cp_table_gpu_addr, > (void **)&adev->gfx.rlc.cp_table_ptr); >@@ -1933,6 +2466,24 @@ > } > } > >+static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) >+{ >+ int vmid; >+ >+ /* >+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA >+ * access. Compute VMIDs should be enabled by FW for target VMIDs, >+ * the driver can enable them for graphics. VMID0 should maintain >+ * access so that HWS firmware can save/restore entries. >+ */ >+ for (vmid = 1; vmid < 16; vmid++) { >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); >+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); >+ } >+} >+ > static void gfx_v9_0_constants_init(struct amdgpu_device *adev) > { > u32 tmp; >@@ -1949,7 +2500,7 @@ > /* XXX SH_MEM regs */ > /* where to put LDS, scratch, GPUVM in FSA64 space */ > mutex_lock(&adev->srbm_mutex); >- for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { >+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { > soc15_grbm_select(adev, 0, 0, 0, i); > /* CP and shaders */ > if (i == 0) { >@@ -1977,6 +2528,7 @@ > mutex_unlock(&adev->srbm_mutex); > > gfx_v9_0_init_compute_vmid(adev); >+ gfx_v9_0_init_gds_vmid(adev); > } > > static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) >@@ -2474,6 +3026,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ case CHIP_RENOIR: > if (amdgpu_lbpw == 0) > gfx_v9_0_enable_lbpw(adev, false); > else >@@ -2853,6 +3406,10 @@ > mqd->compute_static_thread_mgmt_se1 = 0xffffffff; > mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > mqd->compute_static_thread_mgmt_se3 = 0xffffffff; >+ mqd->compute_static_thread_mgmt_se4 = 0xffffffff; >+ mqd->compute_static_thread_mgmt_se5 = 0xffffffff; >+ mqd->compute_static_thread_mgmt_se6 = 0xffffffff; >+ mqd->compute_static_thread_mgmt_se7 = 0xffffffff; > mqd->compute_misc_reserved = 0x00000003; > > mqd->dynamic_cu_mask_addr_lo = >@@ -3256,10 +3813,12 @@ > gfx_v9_0_enable_gui_idle_interrupt(adev, false); > > if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { >- /* legacy firmware loading */ >- r = gfx_v9_0_cp_gfx_load_microcode(adev); >- if (r) >- return r; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ /* legacy firmware loading */ >+ r = gfx_v9_0_cp_gfx_load_microcode(adev); >+ if (r) >+ return r; >+ } > > r = gfx_v9_0_cp_compute_load_microcode(adev); > if (r) >@@ -3270,18 +3829,22 @@ > if (r) > return r; > >- r = gfx_v9_0_cp_gfx_resume(adev); >- if (r) >- return r; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ r = gfx_v9_0_cp_gfx_resume(adev); >+ if (r) >+ return r; >+ } > > r = gfx_v9_0_kcq_resume(adev); > if (r) > return r; > >- ring = &adev->gfx.gfx_ring[0]; >- r = amdgpu_ring_test_helper(ring); >- if (r) >- return r; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ ring = &adev->gfx.gfx_ring[0]; >+ r = amdgpu_ring_test_helper(ring); >+ if (r) >+ return r; >+ } > > for (i = 0; i < adev->gfx.num_compute_rings; i++) { > ring = &adev->gfx.compute_ring[i]; >@@ -3295,7 +3858,8 @@ > > static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) > { >- gfx_v9_0_cp_gfx_enable(adev, enable); >+ if (adev->asic_type != CHIP_ARCTURUS) >+ gfx_v9_0_cp_gfx_enable(adev, enable); > gfx_v9_0_cp_compute_enable(adev, enable); > } > >@@ -3304,7 +3868,8 @@ > int r; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >- gfx_v9_0_init_golden_registers(adev); >+ if (!amdgpu_sriov_vf(adev)) >+ gfx_v9_0_init_golden_registers(adev); > > gfx_v9_0_constants_init(adev); > >@@ -3320,9 +3885,11 @@ > if (r) > return r; > >- r = gfx_v9_0_ngg_en(adev); >- if (r) >- return r; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ r = gfx_v9_0_ngg_en(adev); >+ if (r) >+ return r; >+ } > > return r; > } >@@ -3470,8 +4037,9 @@ > /* stop the rlc */ > adev->gfx.rlc.funcs->stop(adev); > >- /* Disable GFX parsing/prefetching */ >- gfx_v9_0_cp_gfx_enable(adev, false); >+ if (adev->asic_type != CHIP_ARCTURUS) >+ /* Disable GFX parsing/prefetching */ >+ gfx_v9_0_cp_gfx_enable(adev, false); > > /* Disable MEC parsing/prefetching */ > gfx_v9_0_cp_compute_enable(adev, false); >@@ -3814,7 +4382,10 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >- adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; >+ if (adev->asic_type == CHIP_ARCTURUS) >+ adev->gfx.num_gfx_rings = 0; >+ else >+ adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; > adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > gfx_v9_0_set_ring_funcs(adev); > gfx_v9_0_set_irq_funcs(adev); >@@ -3825,6 +4396,7 @@ > } > > static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, > struct amdgpu_iv_entry *entry); > > static int gfx_v9_0_ecc_late_init(void *handle) >@@ -3990,6 +4562,9 @@ > { > amdgpu_gfx_rlc_enter_safe_mode(adev); > >+ if (is_support_sw_smu(adev) && !enable) >+ smu_set_gfx_cgpg(&adev->smu, enable); >+ > if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { > gfx_v9_0_enable_gfx_cg_power_gating(adev, true); > if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) >@@ -4101,6 +4676,9 @@ > { > uint32_t data, def; > >+ if (adev->asic_type == CHIP_ARCTURUS) >+ return; >+ > amdgpu_gfx_rlc_enter_safe_mode(adev); > > /* Enable 3D CGCG/CGLS */ >@@ -4166,8 +4744,12 @@ > /* enable cgcg FSM(0x0000363F) */ > def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); > >- data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | >- RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; >+ if (adev->asic_type == CHIP_ARCTURUS) >+ data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | >+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; >+ else >+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | >+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; > if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) > data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | > RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; >@@ -4239,6 +4821,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ case CHIP_RENOIR: > if (!enable) { > amdgpu_gfx_off_ctrl(adev, false); > cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); >@@ -4257,6 +4840,8 @@ > gfx_v9_0_enable_cp_power_gating(adev, false); > > /* update gfx cgpg state */ >+ if (is_support_sw_smu(adev) && enable) >+ smu_set_gfx_cgpg(&adev->smu, enable); > gfx_v9_0_update_gfx_cg_power_gating(adev, enable); > > /* update mgcg state */ >@@ -4293,6 +4878,8 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: > gfx_v9_0_update_gfx_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > break; >@@ -4334,14 +4921,16 @@ > if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) > *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; > >- /* AMD_CG_SUPPORT_GFX_3D_CGCG */ >- data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); >- if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) >- *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; >- >- /* AMD_CG_SUPPORT_GFX_3D_CGLS */ >- if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) >- *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; >+ if (adev->asic_type != CHIP_ARCTURUS) { >+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */ >+ data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); >+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) >+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; >+ >+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */ >+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) >+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; >+ } > } > > static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) >@@ -5137,12 +5726,423 @@ > } > > static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, > struct amdgpu_iv_entry *entry) > { > /* TODO ue will trigger an interrupt. */ > kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); >+ if (adev->gfx.funcs->query_ras_error_count) >+ adev->gfx.funcs->query_ras_error_count(adev, err_data); > amdgpu_ras_reset_gpu(adev, 0); >- return AMDGPU_RAS_UE; >+ return AMDGPU_RAS_SUCCESS; >+} >+ >+static const struct { >+ const char *name; >+ uint32_t ip; >+ uint32_t inst; >+ uint32_t seg; >+ uint32_t reg_offset; >+ uint32_t per_se_instance; >+ int32_t num_instance; >+ uint32_t sec_count_mask; >+ uint32_t ded_count_mask; >+} gfx_ras_edc_regs[] = { >+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, >+ REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), >+ REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, >+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, >+ REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), >+ REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, >+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, >+ REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, >+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, >+ REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, >+ { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, >+ REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), >+ REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, >+ { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, >+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, >+ { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, >+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), >+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, >+ { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, >+ REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), >+ REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, >+ { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, >+ REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, >+ { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, >+ REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, >+ { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, >+ REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, >+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, >+ { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, >+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), >+ 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, >+ { "GDS_OA_PHY_PHY_CMD_RAM_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, >+ { "GDS_OA_PHY_PHY_DATA_RAM_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, >+ { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, >+ { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, >+ { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, >+ { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", >+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), >+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, >+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, >+ REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, >+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, >+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, >+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, >+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, >+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, >+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, >+ REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, >+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, >+ REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, >+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), >+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, >+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), >+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, >+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), >+ REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, >+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), >+ REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, >+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), >+ REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, >+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, >+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, >+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, >+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, >+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, >+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, >+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, >+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, >+ REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, >+ { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, >+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, >+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), >+ 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), >+ 0 }, >+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, >+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, >+ { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), >+ 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), >+ 0 }, >+ { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, >+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, >+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, >+ REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, >+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, >+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, >+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, >+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, >+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, >+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, >+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), >+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, >+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), >+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, >+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), >+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, >+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, >+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, >+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, >+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, >+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, >+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, >+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, >+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), >+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, >+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), >+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, >+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, >+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), >+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, >+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, >+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), >+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, >+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, >+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, >+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, >+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, >+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, >+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", >+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, >+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), >+ 0 }, >+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", >+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, >+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, >+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, >+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, >+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, >+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, >+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, >+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), >+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, >+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", >+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, >+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), >+ 0 }, >+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, >+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, >+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", >+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, >+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, >+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, >+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, >+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, >+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, >+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, >+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, >+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, >+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, >+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, >+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, >+ { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, >+ { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, >+ { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, >+ { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, >+ { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, >+ { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, >+ { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, >+ { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, >+ { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, >+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, >+}; >+ >+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, >+ void *inject_if) >+{ >+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if; >+ int ret; >+ struct ta_ras_trigger_error_input block_info = { 0 }; >+ >+ if (adev->asic_type != CHIP_VEGA20) >+ return -EINVAL; >+ >+ if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) >+ return -EINVAL; >+ >+ if (!ras_gfx_subblocks[info->head.sub_block_index].name) >+ return -EPERM; >+ >+ if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & >+ info->head.type)) { >+ DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", >+ ras_gfx_subblocks[info->head.sub_block_index].name, >+ info->head.type); >+ return -EPERM; >+ } >+ >+ if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & >+ info->head.type)) { >+ DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", >+ ras_gfx_subblocks[info->head.sub_block_index].name, >+ info->head.type); >+ return -EPERM; >+ } >+ >+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); >+ block_info.sub_block_index = >+ ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; >+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); >+ block_info.address = info->address; >+ block_info.value = info->value; >+ >+ mutex_lock(&adev->grbm_idx_mutex); >+ ret = psp_ras_trigger_error(&adev->psp, &block_info); >+ mutex_unlock(&adev->grbm_idx_mutex); >+ >+ return ret; >+} >+ >+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, >+ void *ras_error_status) >+{ >+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; >+ uint32_t sec_count, ded_count; >+ uint32_t i; >+ uint32_t reg_value; >+ uint32_t se_id, instance_id; >+ >+ if (adev->asic_type != CHIP_VEGA20) >+ return -EINVAL; >+ >+ err_data->ue_count = 0; >+ err_data->ce_count = 0; >+ >+ mutex_lock(&adev->grbm_idx_mutex); >+ for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { >+ for (instance_id = 0; instance_id < 256; instance_id++) { >+ for (i = 0; >+ i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); >+ i++) { >+ if (se_id != 0 && >+ !gfx_ras_edc_regs[i].per_se_instance) >+ continue; >+ if (instance_id >= gfx_ras_edc_regs[i].num_instance) >+ continue; >+ >+ gfx_v9_0_select_se_sh(adev, se_id, 0, >+ instance_id); >+ >+ reg_value = RREG32( >+ adev->reg_offset[gfx_ras_edc_regs[i].ip] >+ [gfx_ras_edc_regs[i].inst] >+ [gfx_ras_edc_regs[i].seg] + >+ gfx_ras_edc_regs[i].reg_offset); >+ sec_count = reg_value & >+ gfx_ras_edc_regs[i].sec_count_mask; >+ ded_count = reg_value & >+ gfx_ras_edc_regs[i].ded_count_mask; >+ if (sec_count) { >+ DRM_INFO( >+ "Instance[%d][%d]: SubBlock %s, SEC %d\n", >+ se_id, instance_id, >+ gfx_ras_edc_regs[i].name, >+ sec_count); >+ err_data->ce_count++; >+ } >+ >+ if (ded_count) { >+ DRM_INFO( >+ "Instance[%d][%d]: SubBlock %s, DED %d\n", >+ se_id, instance_id, >+ gfx_ras_edc_regs[i].name, >+ ded_count); >+ err_data->ue_count++; >+ } >+ } >+ } >+ } >+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); >+ mutex_unlock(&adev->grbm_idx_mutex); >+ >+ return 0; > } > > static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, >@@ -5187,7 +6187,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v9_0_ring_get_rptr_gfx, > .get_wptr = gfx_v9_0_ring_get_wptr_gfx, > .set_wptr = gfx_v9_0_ring_set_wptr_gfx, >@@ -5238,7 +6238,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, >@@ -5273,7 +6273,7 @@ > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, >@@ -5353,6 +6353,8 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: > adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; > break; > default: >@@ -5370,6 +6372,7 @@ > adev->gds.gds_size = 0x10000; > break; > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: > adev->gds.gds_size = 0x1000; > break; > default: >@@ -5391,6 +6394,9 @@ > else > adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ > break; >+ case CHIP_ARCTURUS: >+ adev->gds.gds_compute_max_wave_id = 0xfff; >+ break; > default: > /* this really depends on the chip */ > adev->gds.gds_compute_max_wave_id = 0x7ff; >@@ -5435,12 +6441,21 @@ > { > int i, j, k, counter, active_cu_number = 0; > u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; >- unsigned disable_masks[4 * 2]; >+ unsigned disable_masks[4 * 4]; > > if (!adev || !cu_info) > return -EINVAL; > >- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); >+ /* >+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs >+ */ >+ if (adev->gfx.config.max_shader_engines * >+ adev->gfx.config.max_sh_per_se > 16) >+ return -EINVAL; >+ >+ amdgpu_gfx_parse_disable_cu(disable_masks, >+ adev->gfx.config.max_shader_engines, >+ adev->gfx.config.max_sh_per_se); > > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { >@@ -5449,11 +6464,23 @@ > ao_bitmap = 0; > counter = 0; > gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); >- if (i < 4 && j < 2) >- gfx_v9_0_set_user_cu_inactive_bitmap( >- adev, disable_masks[i * 2 + j]); >+ gfx_v9_0_set_user_cu_inactive_bitmap( >+ adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); > bitmap = gfx_v9_0_get_cu_active_bitmap(adev); >- cu_info->bitmap[i][j] = bitmap; >+ >+ /* >+ * The bitmap(and ao_cu_bitmap) in cu_info structure is >+ * 4x4 size array, and it's usually suitable for Vega >+ * ASICs which has 4*2 SE/SH layout. >+ * But for Arcturus, SE/SH layout is changed to 8*1. >+ * To mostly reduce the impact, we make it compatible >+ * with current bitmap array as below: >+ * SE4,SH0 --> bitmap[0][1] >+ * SE5,SH0 --> bitmap[1][1] >+ * SE6,SH0 --> bitmap[2][1] >+ * SE7,SH0 --> bitmap[3][1] >+ */ >+ cu_info->bitmap[i % 4][j + i / 4] = bitmap; > > for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { > if (bitmap & mask) { >@@ -5466,7 +6493,7 @@ > active_cu_number += counter; > if (i < 2 && j < 2) > ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); >- cu_info->ao_cu_bitmap[i][j] = ao_bitmap; >+ cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; > } > } > gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -62,7 +62,7 @@ > struct amdgpu_vmhub *hub; > u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; > >- bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | >+ bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | >@@ -70,7 +70,7 @@ > GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; > >- bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | >+ bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | >@@ -81,39 +81,39 @@ > switch (state) { > case AMDGPU_IRQ_STATE_DISABLE: > /* MM HUB */ >- hub = &adev->vmhub[AMDGPU_MMHUB]; >+ hub = &adev->vmhub[AMDGPU_MMHUB_0]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; > tmp = RREG32(reg); >- tmp &= ~bits[AMDGPU_MMHUB]; >+ tmp &= ~bits[AMDGPU_MMHUB_0]; > WREG32(reg, tmp); > } > > /* GFX HUB */ >- hub = &adev->vmhub[AMDGPU_GFXHUB]; >+ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; > tmp = RREG32(reg); >- tmp &= ~bits[AMDGPU_GFXHUB]; >+ tmp &= ~bits[AMDGPU_GFXHUB_0]; > WREG32(reg, tmp); > } > break; > case AMDGPU_IRQ_STATE_ENABLE: > /* MM HUB */ >- hub = &adev->vmhub[AMDGPU_MMHUB]; >+ hub = &adev->vmhub[AMDGPU_MMHUB_0]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; > tmp = RREG32(reg); >- tmp |= bits[AMDGPU_MMHUB]; >+ tmp |= bits[AMDGPU_MMHUB_0]; > WREG32(reg, tmp); > } > > /* GFX HUB */ >- hub = &adev->vmhub[AMDGPU_GFXHUB]; >+ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; > tmp = RREG32(reg); >- tmp |= bits[AMDGPU_GFXHUB]; >+ tmp |= bits[AMDGPU_GFXHUB_0]; > WREG32(reg, tmp); > } > break; >@@ -136,22 +136,53 @@ > addr |= ((u64)entry->src_data[1] & 0xf) << 44; > > if (!amdgpu_sriov_vf(adev)) { >+ /* >+ * Issue a dummy read to wait for the status register to >+ * be updated to avoid reading an incorrect value due to >+ * the new fast GRBM interface. >+ */ >+ if (entry->vmid_src == AMDGPU_GFXHUB_0) >+ RREG32(hub->vm_l2_pro_fault_status); >+ > status = RREG32(hub->vm_l2_pro_fault_status); > WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); > } > > if (printk_ratelimit()) { >+ struct amdgpu_task_info task_info; >+ >+ memset(&task_info, 0, sizeof(struct amdgpu_task_info)); >+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); >+ > dev_err(adev->dev, >- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", >+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " >+ "for process %s pid %d thread %s pid %d)\n", > entry->vmid_src ? "mmhub" : "gfxhub", > entry->src_id, entry->ring_id, entry->vmid, >- entry->pasid); >- dev_err(adev->dev, " at page 0x%016llx from %d\n", >+ entry->pasid, task_info.process_name, task_info.tgid, >+ task_info.task_name, task_info.pid); >+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", > addr, entry->client_id); >- if (!amdgpu_sriov_vf(adev)) >+ if (!amdgpu_sriov_vf(adev)) { > dev_err(adev->dev, >- "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", >+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); >+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", >+ REG_GET_FIELD(status, >+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); >+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", >+ REG_GET_FIELD(status, >+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); >+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", >+ REG_GET_FIELD(status, >+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); >+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", >+ REG_GET_FIELD(status, >+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); >+ dev_err(adev->dev, "\t RW: 0x%lx\n", >+ REG_GET_FIELD(status, >+ GCVM_L2_PROTECTION_FAULT_STATUS, RW)); >+ } > } > > return 0; >@@ -206,6 +237,13 @@ > > WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); > >+ /* >+ * Issue a dummy read to wait for the ACK register to be cleared >+ * to avoid a false ACK due to the new fast GRBM interface. >+ */ >+ if (vmhub == AMDGPU_GFXHUB_0) >+ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); >+ > /* Wait for ACK with a delay.*/ > for (i = 0; i < adev->usec_timeout; i++) { > tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); >@@ -230,8 +268,8 @@ > * > * Flush the TLB for the requested page table. > */ >-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type) >+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type) > { > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > struct dma_fence *fence; >@@ -244,11 +282,18 @@ > > mutex_lock(&adev->mman.gtt_window_lock); > >- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); >+ if (vmhub == AMDGPU_MMHUB_0) { >+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); >+ mutex_unlock(&adev->mman.gtt_window_lock); >+ return; >+ } >+ >+ BUG_ON(vmhub != AMDGPU_GFXHUB_0); >+ > if (!adev->mman.buffer_funcs_enabled || > !adev->ib_pool_ready || > adev->in_gpu_reset) { >- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); >+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); > mutex_unlock(&adev->mman.gtt_window_lock); > return; > } >@@ -313,7 +358,7 @@ > struct amdgpu_device *adev = ring->adev; > uint32_t reg; > >- if (ring->funcs->vmhub == AMDGPU_GFXHUB) >+ if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) > reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; > else > reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; >@@ -524,6 +569,8 @@ > if (amdgpu_gart_size == -1) { > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > default: > adev->gmc.gart_size = 512ULL << 20; > break; >@@ -590,7 +637,6 @@ > static int gmc_v10_0_sw_init(void *handle) > { > int r; >- int dma_bits; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > gfxhub_v2_0_init(adev); >@@ -601,9 +647,12 @@ > adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: >+ adev->num_vmhubs = 2; > /* > * To fulfill 4-level page support, >- * vm size is 256TB (48bit), maximum size of Navi10, >+ * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, > * block size 512 (9bit) > */ > amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); >@@ -637,26 +686,10 @@ > else > adev->gmc.stolen_size = 9 * 1024 *1024; > >- /* >- * Set DMA mask + need_dma32 flags. >- * PCIE - can handle 44-bits. >- * IGP - can handle 44-bits >- * PCI - dma32 for legacy pci gart, 44 bits on navi10 >- */ >- adev->need_dma32 = false; >- dma_bits = adev->need_dma32 ? 32 : 44; >- >- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); > if (r) { >- adev->need_dma32 = true; >- dma_bits = 32; > printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); >- } >- >- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >- if (r) { >- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); >- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); >+ return r; > } > > r = gmc_v10_0_mc_init(adev); >@@ -680,8 +713,8 @@ > * amdgpu graphics/compute will use VMIDs 1-7 > * amdkfd will use VMIDs 8-15 > */ >- adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; >- adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; >+ adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; >+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; > > amdgpu_vm_manager_init(adev); > >@@ -717,6 +750,8 @@ > { > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > break; > default: > break; >@@ -766,7 +801,8 @@ > > gfxhub_v2_0_set_fault_enable_default(adev, value); > mmhub_v2_0_set_fault_enable_default(adev, value); >- gmc_v10_0_flush_gpu_tlb(adev, 0, 0); >+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); >+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); > > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -362,8 +362,8 @@ > return 0; > } > >-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type) >+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type) > { > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > } >@@ -571,7 +571,7 @@ > else > gmc_v6_0_set_fault_enable_default(adev, true); > >- gmc_v6_0_flush_gpu_tlb(adev, 0, 0); >+ gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); > dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); >@@ -839,9 +839,10 @@ > static int gmc_v6_0_sw_init(void *handle) > { > int r; >- int dma_bits; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >+ adev->num_vmhubs = 1; >+ > if (adev->flags & AMD_IS_APU) { > adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; > } else { >@@ -862,20 +863,12 @@ > > adev->gmc.mc_mask = 0xffffffffffULL; > >- adev->need_dma32 = false; >- dma_bits = adev->need_dma32 ? 32 : 40; >- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); > if (r) { >- adev->need_dma32 = true; >- dma_bits = 32; > dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); >+ return r; > } >- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >- if (r) { >- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); >- dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); >- } >- adev->need_swiotlb = drm_need_swiotlb(dma_bits); >+ adev->need_swiotlb = drm_need_swiotlb(44); > > r = gmc_v6_0_init_microcode(adev); > if (r) { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -433,8 +433,8 @@ > * > * Flush the TLB for the requested page table (CIK). > */ >-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type) >+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type) > { > /* bits 0-15 are the VM contexts0-15 */ > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >@@ -677,7 +677,7 @@ > WREG32(mmCHUB_CONTROL, tmp); > } > >- gmc_v7_0_flush_gpu_tlb(adev, 0, 0); >+ gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); >@@ -959,9 +959,10 @@ > static int gmc_v7_0_sw_init(void *handle) > { > int r; >- int dma_bits; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >+ adev->num_vmhubs = 1; >+ > if (adev->flags & AMD_IS_APU) { > adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; > } else { >@@ -990,25 +991,12 @@ > */ > adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ > >- /* set DMA mask + need_dma32 flags. >- * PCIE - can handle 40-bits. >- * IGP - can handle 40-bits >- * PCI - dma32 for legacy pci gart, 40 bits on newer asics >- */ >- adev->need_dma32 = false; >- dma_bits = adev->need_dma32 ? 32 : 40; >- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); > if (r) { >- adev->need_dma32 = true; >- dma_bits = 32; > pr_warn("amdgpu: No suitable DMA available\n"); >+ return r; > } >- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >- if (r) { >- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); >- pr_warn("amdgpu: No coherent DMA available\n"); >- } >- adev->need_swiotlb = drm_need_swiotlb(dma_bits); >+ adev->need_swiotlb = drm_need_swiotlb(40); > > r = gmc_v7_0_init_microcode(adev); > if (r) { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -635,8 +635,8 @@ > * > * Flush the TLB for the requested page table (VI). > */ >-static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type) >+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type) > { > /* bits 0-15 are the VM contexts0-15 */ > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >@@ -921,7 +921,7 @@ > else > gmc_v8_0_set_fault_enable_default(adev, true); > >- gmc_v8_0_flush_gpu_tlb(adev, 0, 0); >+ gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); >@@ -1079,9 +1079,10 @@ > static int gmc_v8_0_sw_init(void *handle) > { > int r; >- int dma_bits; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >+ adev->num_vmhubs = 1; >+ > if (adev->flags & AMD_IS_APU) { > adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; > } else { >@@ -1116,25 +1117,12 @@ > */ > adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ > >- /* set DMA mask + need_dma32 flags. >- * PCIE - can handle 40-bits. >- * IGP - can handle 40-bits >- * PCI - dma32 for legacy pci gart, 40 bits on newer asics >- */ >- adev->need_dma32 = false; >- dma_bits = adev->need_dma32 ? 32 : 40; >- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); > if (r) { >- adev->need_dma32 = true; >- dma_bits = 32; > pr_warn("amdgpu: No suitable DMA available\n"); >+ return r; > } >- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >- if (r) { >- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); >- pr_warn("amdgpu: No coherent DMA available\n"); >- } >- adev->need_swiotlb = drm_need_swiotlb(dma_bits); >+ adev->need_swiotlb = drm_need_swiotlb(40); > > r = gmc_v8_0_init_microcode(adev); > if (r) { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -47,7 +47,10 @@ > > #include "gfxhub_v1_0.h" > #include "mmhub_v1_0.h" >+#include "athub_v1_0.h" > #include "gfxhub_v1_1.h" >+#include "mmhub_v9_4.h" >+#include "umc_v6_1.h" > > #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" > >@@ -241,18 +244,30 @@ > } > > static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, > struct amdgpu_iv_entry *entry) > { > kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); >- amdgpu_ras_reset_gpu(adev, 0); >- return AMDGPU_RAS_UE; >+ if (adev->umc.funcs->query_ras_error_count) >+ adev->umc.funcs->query_ras_error_count(adev, err_data); >+ /* umc query_ras_error_address is also responsible for clearing >+ * error status >+ */ >+ if (adev->umc.funcs->query_ras_error_address) >+ adev->umc.funcs->query_ras_error_address(adev, err_data); >+ >+ /* only uncorrectable error needs gpu reset */ >+ if (err_data->ue_count) >+ amdgpu_ras_reset_gpu(adev, 0); >+ >+ return AMDGPU_RAS_SUCCESS; > } > > static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry) > { >- struct ras_common_if *ras_if = adev->gmc.ras_if; >+ struct ras_common_if *ras_if = adev->gmc.umc_ras_if; > struct ras_dispatch_if ih_data = { > .entry = entry, > }; >@@ -284,7 +299,7 @@ > > switch (state) { > case AMDGPU_IRQ_STATE_DISABLE: >- for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { >+ for (j = 0; j < adev->num_vmhubs; j++) { > hub = &adev->vmhub[j]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; >@@ -295,7 +310,7 @@ > } > break; > case AMDGPU_IRQ_STATE_ENABLE: >- for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { >+ for (j = 0; j < adev->num_vmhubs; j++) { > hub = &adev->vmhub[j]; > for (i = 0; i < 16; i++) { > reg = hub->vm_context0_cntl + i; >@@ -315,10 +330,11 @@ > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry) > { >- struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; >+ struct amdgpu_vmhub *hub; > bool retry_fault = !!(entry->src_data[1] & 0x80); > uint32_t status = 0; > u64 addr; >+ char hub_name[10]; > > addr = (u64)entry->src_data[0] << 12; > addr |= ((u64)entry->src_data[1] & 0xf) << 44; >@@ -327,8 +343,27 @@ > entry->timestamp)) > return 1; /* This also prevents sending it to KFD */ > >+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) { >+ snprintf(hub_name, sizeof(hub_name), "mmhub0"); >+ hub = &adev->vmhub[AMDGPU_MMHUB_0]; >+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { >+ snprintf(hub_name, sizeof(hub_name), "mmhub1"); >+ hub = &adev->vmhub[AMDGPU_MMHUB_1]; >+ } else { >+ snprintf(hub_name, sizeof(hub_name), "gfxhub0"); >+ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; >+ } >+ > /* If it's the first fault for this address, process it normally */ > if (!amdgpu_sriov_vf(adev)) { >+ /* >+ * Issue a dummy read to wait for the status register to >+ * be updated to avoid reading an incorrect value due to >+ * the new fast GRBM interface. >+ */ >+ if (entry->vmid_src == AMDGPU_GFXHUB_0) >+ RREG32(hub->vm_l2_pro_fault_status); >+ > status = RREG32(hub->vm_l2_pro_fault_status); > WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); > } >@@ -342,17 +377,33 @@ > dev_err(adev->dev, > "[%s] %s page fault (src_id:%u ring:%u vmid:%u " > "pasid:%u, for process %s pid %d thread %s pid %d)\n", >- entry->vmid_src ? "mmhub" : "gfxhub", >- retry_fault ? "retry" : "no-retry", >+ hub_name, retry_fault ? "retry" : "no-retry", > entry->src_id, entry->ring_id, entry->vmid, > entry->pasid, task_info.process_name, task_info.tgid, > task_info.task_name, task_info.pid); >- dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", >+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", > addr, entry->client_id); >- if (!amdgpu_sriov_vf(adev)) >+ if (!amdgpu_sriov_vf(adev)) { > dev_err(adev->dev, > "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); >+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", >+ REG_GET_FIELD(status, >+ VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); >+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", >+ REG_GET_FIELD(status, >+ VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); >+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", >+ REG_GET_FIELD(status, >+ VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); >+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", >+ REG_GET_FIELD(status, >+ VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); >+ dev_err(adev->dev, "\t RW: 0x%lx\n", >+ REG_GET_FIELD(status, >+ VM_L2_PROTECTION_FAULT_STATUS, RW)); >+ >+ } > } > > return 0; >@@ -413,44 +464,53 @@ > * > * Flush the TLB for the requested page table using certain type. > */ >-static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, >- uint32_t vmid, uint32_t flush_type) >+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >+ uint32_t vmhub, uint32_t flush_type) > { > const unsigned eng = 17; >- unsigned i, j; >+ u32 j, tmp; >+ struct amdgpu_vmhub *hub; > >- for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { >- struct amdgpu_vmhub *hub = &adev->vmhub[i]; >- u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); >+ BUG_ON(vmhub >= adev->num_vmhubs); > >- /* This is necessary for a HW workaround under SRIOV as well >- * as GFXOFF under bare metal >- */ >- if (adev->gfx.kiq.ring.sched.ready && >- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && >- !adev->in_gpu_reset) { >- uint32_t req = hub->vm_inv_eng0_req + eng; >- uint32_t ack = hub->vm_inv_eng0_ack + eng; >- >- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, >- 1 << vmid); >- continue; >- } >+ hub = &adev->vmhub[vmhub]; >+ tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); > >- spin_lock(&adev->gmc.invalidate_lock); >- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); >- for (j = 0; j < adev->usec_timeout; j++) { >- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); >- if (tmp & (1 << vmid)) >- break; >- udelay(1); >- } >- spin_unlock(&adev->gmc.invalidate_lock); >- if (j < adev->usec_timeout) >- continue; >+ /* This is necessary for a HW workaround under SRIOV as well >+ * as GFXOFF under bare metal >+ */ >+ if (adev->gfx.kiq.ring.sched.ready && >+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && >+ !adev->in_gpu_reset) { >+ uint32_t req = hub->vm_inv_eng0_req + eng; >+ uint32_t ack = hub->vm_inv_eng0_ack + eng; > >- DRM_ERROR("Timeout waiting for VM flush ACK!\n"); >+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, >+ 1 << vmid); >+ return; > } >+ >+ spin_lock(&adev->gmc.invalidate_lock); >+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); >+ >+ /* >+ * Issue a dummy read to wait for the ACK register to be cleared >+ * to avoid a false ACK due to the new fast GRBM interface. >+ */ >+ if (vmhub == AMDGPU_GFXHUB_0) >+ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); >+ >+ for (j = 0; j < adev->usec_timeout; j++) { >+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); >+ if (tmp & (1 << vmid)) >+ break; >+ udelay(1); >+ } >+ spin_unlock(&adev->gmc.invalidate_lock); >+ if (j < adev->usec_timeout) >+ return; >+ >+ DRM_ERROR("Timeout waiting for VM flush ACK!\n"); > } > > static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >@@ -480,7 +540,11 @@ > struct amdgpu_device *adev = ring->adev; > uint32_t reg; > >- if (ring->funcs->vmhub == AMDGPU_GFXHUB) >+ /* Do nothing because there's no lut register for mmhub1. */ >+ if (ring->funcs->vmhub == AMDGPU_MMHUB_1) >+ return; >+ >+ if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) > reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; > else > reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; >@@ -597,12 +661,41 @@ > adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; > } > >+static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) >+{ >+ switch (adev->asic_type) { >+ case CHIP_VEGA20: >+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; >+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; >+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; >+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; >+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; >+ adev->umc.funcs = &umc_v6_1_funcs; >+ break; >+ default: >+ break; >+ } >+} >+ >+static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) >+{ >+ switch (adev->asic_type) { >+ case CHIP_VEGA20: >+ adev->mmhub_funcs = &mmhub_v1_0_funcs; >+ break; >+ default: >+ break; >+ } >+} >+ > static int gmc_v9_0_early_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > gmc_v9_0_set_gmc_funcs(adev); > gmc_v9_0_set_irq_funcs(adev); >+ gmc_v9_0_set_umc_funcs(adev); >+ gmc_v9_0_set_mmhub_funcs(adev); > > adev->gmc.shared_aperture_start = 0x2000000000000000ULL; > adev->gmc.shared_aperture_end = >@@ -629,6 +722,8 @@ > switch (adev->asic_type) { > case CHIP_VEGA10: > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: > return true; > case CHIP_VEGA12: > case CHIP_VEGA20: >@@ -641,7 +736,8 @@ > { > struct amdgpu_ring *ring; > unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = >- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; >+ {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, >+ GFXHUB_FREE_VM_INV_ENGS_BITMAP}; > unsigned i; > unsigned vmhub, inv_eng; > >@@ -666,29 +762,28 @@ > return 0; > } > >-static int gmc_v9_0_ecc_late_init(void *handle) >+static int gmc_v9_0_ecc_ras_block_late_init(void *handle, >+ struct ras_fs_if *fs_info, struct ras_common_if *ras_block) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- struct ras_common_if **ras_if = &adev->gmc.ras_if; >+ struct ras_common_if **ras_if = NULL; > struct ras_ih_if ih_info = { > .cb = gmc_v9_0_process_ras_data_cb, > }; >- struct ras_fs_if fs_info = { >- .sysfs_name = "umc_err_count", >- .debugfs_name = "umc_err_inject", >- }; >- struct ras_common_if ras_block = { >- .block = AMDGPU_RAS_BLOCK__UMC, >- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, >- .sub_block_index = 0, >- .name = "umc", >- }; > int r; > >- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { >- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); >+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) >+ ras_if = &adev->gmc.umc_ras_if; >+ else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB) >+ ras_if = &adev->gmc.mmhub_ras_if; >+ else >+ BUG(); >+ >+ if (!amdgpu_ras_is_supported(adev, ras_block->block)) { >+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); > return 0; > } >+ > /* handle resume path. */ > if (*ras_if) { > /* resend ras TA enable cmd during resume. >@@ -700,7 +795,7 @@ > if (r == -EAGAIN) { > /* request a gpu reset. will run again. */ > amdgpu_ras_request_reset_on_boot(adev, >- AMDGPU_RAS_BLOCK__UMC); >+ ras_block->block); > return 0; > } > /* fail to enable ras, cleanup all. */ >@@ -714,41 +809,46 @@ > if (!*ras_if) > return -ENOMEM; > >- **ras_if = ras_block; >+ **ras_if = *ras_block; > > r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); > if (r) { > if (r == -EAGAIN) { > amdgpu_ras_request_reset_on_boot(adev, >- AMDGPU_RAS_BLOCK__UMC); >+ ras_block->block); > r = 0; > } > goto feature; > } > > ih_info.head = **ras_if; >- fs_info.head = **ras_if; >+ fs_info->head = **ras_if; > >- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); >- if (r) >- goto interrupt; >+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { >+ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); >+ if (r) >+ goto interrupt; >+ } > >- amdgpu_ras_debugfs_create(adev, &fs_info); >+ amdgpu_ras_debugfs_create(adev, fs_info); > >- r = amdgpu_ras_sysfs_create(adev, &fs_info); >+ r = amdgpu_ras_sysfs_create(adev, fs_info); > if (r) > goto sysfs; > resume: >- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); >- if (r) >- goto irq; >+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { >+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); >+ if (r) >+ goto irq; >+ } > > return 0; > irq: > amdgpu_ras_sysfs_remove(adev, *ras_if); > sysfs: > amdgpu_ras_debugfs_remove(adev, *ras_if); >- amdgpu_ras_interrupt_remove_handler(adev, &ih_info); >+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) >+ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); > interrupt: > amdgpu_ras_feature_enable(adev, *ras_if, 0); > feature: >@@ -757,6 +857,40 @@ > return r; > } > >+static int gmc_v9_0_ecc_late_init(void *handle) >+{ >+ int r; >+ >+ struct ras_fs_if umc_fs_info = { >+ .sysfs_name = "umc_err_count", >+ .debugfs_name = "umc_err_inject", >+ }; >+ struct ras_common_if umc_ras_block = { >+ .block = AMDGPU_RAS_BLOCK__UMC, >+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, >+ .sub_block_index = 0, >+ .name = "umc", >+ }; >+ struct ras_fs_if mmhub_fs_info = { >+ .sysfs_name = "mmhub_err_count", >+ .debugfs_name = "mmhub_err_inject", >+ }; >+ struct ras_common_if mmhub_ras_block = { >+ .block = AMDGPU_RAS_BLOCK__MMHUB, >+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, >+ .sub_block_index = 0, >+ .name = "mmhub", >+ }; >+ >+ r = gmc_v9_0_ecc_ras_block_late_init(handle, >+ &umc_fs_info, &umc_ras_block); >+ if (r) >+ return r; >+ >+ r = gmc_v9_0_ecc_ras_block_late_init(handle, >+ &mmhub_fs_info, &mmhub_ras_block); >+ return r; >+} > > static int gmc_v9_0_late_init(void *handle) > { >@@ -806,14 +940,17 @@ > struct amdgpu_gmc *mc) > { > u64 base = 0; >- if (!amdgpu_sriov_vf(adev)) >+ >+ if (adev->asic_type == CHIP_ARCTURUS) >+ base = mmhub_v9_4_get_fb_location(adev); >+ else if (!amdgpu_sriov_vf(adev)) > base = mmhub_v1_0_get_fb_location(adev); >+ > /* add the xgmi offset of the physical node */ > base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; > amdgpu_gmc_vram_location(adev, mc, base); > amdgpu_gmc_gart_location(adev, mc); >- if (!amdgpu_sriov_vf(adev)) >- amdgpu_gmc_agp_location(adev, mc); >+ amdgpu_gmc_agp_location(adev, mc); > /* base offset of vram pages */ > adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); > >@@ -887,10 +1024,12 @@ > case CHIP_VEGA10: /* all engines support GPUVM */ > case CHIP_VEGA12: /* all engines support GPUVM */ > case CHIP_VEGA20: >+ case CHIP_ARCTURUS: > default: > adev->gmc.gart_size = 512ULL << 20; > break; > case CHIP_RAVEN: /* DCE SG support */ >+ case CHIP_RENOIR: > adev->gmc.gart_size = 1024ULL << 20; > break; > } >@@ -923,7 +1062,7 @@ > > static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) > { >- u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); >+ u32 d1vga_control; > unsigned size; > > /* >@@ -933,6 +1072,7 @@ > if (gmc_v9_0_keep_stolen_memory(adev)) > return 9 * 1024 * 1024; > >+ d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); > if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { > size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ > } else { >@@ -940,6 +1080,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ case CHIP_RENOIR: > viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); > size = (REG_GET_FIELD(viewport, > HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * >@@ -968,17 +1109,21 @@ > static int gmc_v9_0_sw_init(void *handle) > { > int r; >- int dma_bits; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > gfxhub_v1_0_init(adev); >- mmhub_v1_0_init(adev); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ mmhub_v9_4_init(adev); >+ else >+ mmhub_v1_0_init(adev); > > spin_lock_init(&adev->gmc.invalidate_lock); > > adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); > switch (adev->asic_type) { > case CHIP_RAVEN: >+ adev->num_vmhubs = 2; >+ > if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { > amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); > } else { >@@ -991,6 +1136,10 @@ > case CHIP_VEGA10: > case CHIP_VEGA12: > case CHIP_VEGA20: >+ case CHIP_RENOIR: >+ adev->num_vmhubs = 2; >+ >+ > /* > * To fulfill 4-level page support, > * vm size is 256TB (48bit), maximum size of Vega10, >@@ -1002,6 +1151,12 @@ > else > amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); > break; >+ case CHIP_ARCTURUS: >+ adev->num_vmhubs = 3; >+ >+ /* Keep the vm size same with Vega20 */ >+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); >+ break; > default: > break; > } >@@ -1012,6 +1167,13 @@ > if (r) > return r; > >+ if (adev->asic_type == CHIP_ARCTURUS) { >+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, >+ &adev->gmc.vm_fault); >+ if (r) >+ return r; >+ } >+ > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, > &adev->gmc.vm_fault); > >@@ -1030,25 +1192,12 @@ > */ > adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ > >- /* set DMA mask + need_dma32 flags. >- * PCIE - can handle 44-bits. >- * IGP - can handle 44-bits >- * PCI - dma32 for legacy pci gart, 44 bits on vega10 >- */ >- adev->need_dma32 = false; >- dma_bits = adev->need_dma32 ? 32 : 44; >- r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); > if (r) { >- adev->need_dma32 = true; >- dma_bits = 32; > printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); >+ return r; > } >- r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); >- if (r) { >- pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); >- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); >- } >- adev->need_swiotlb = drm_need_swiotlb(dma_bits); >+ adev->need_swiotlb = drm_need_swiotlb(44); > > if (adev->gmc.xgmi.supported) { > r = gfxhub_v1_1_get_xgmi_info(adev); >@@ -1077,8 +1226,9 @@ > * amdgpu graphics/compute will use VMIDs 1-7 > * amdkfd will use VMIDs 8-15 > */ >- adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; >- adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; >+ adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; >+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; >+ adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; > > amdgpu_vm_manager_init(adev); > >@@ -1088,28 +1238,40 @@ > static int gmc_v9_0_sw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ void *stolen_vga_buf; > > if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && >- adev->gmc.ras_if) { >- struct ras_common_if *ras_if = adev->gmc.ras_if; >+ adev->gmc.umc_ras_if) { >+ struct ras_common_if *ras_if = adev->gmc.umc_ras_if; > struct ras_ih_if ih_info = { > .head = *ras_if, > }; > >- /*remove fs first*/ >+ /* remove fs first */ > amdgpu_ras_debugfs_remove(adev, ras_if); > amdgpu_ras_sysfs_remove(adev, ras_if); >- /*remove the IH*/ >+ /* remove the IH */ > amdgpu_ras_interrupt_remove_handler(adev, &ih_info); > amdgpu_ras_feature_enable(adev, ras_if, 0); > kfree(ras_if); > } > >+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && >+ adev->gmc.mmhub_ras_if) { >+ struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if; >+ >+ /* remove fs and disable ras feature */ >+ amdgpu_ras_debugfs_remove(adev, ras_if); >+ amdgpu_ras_sysfs_remove(adev, ras_if); >+ amdgpu_ras_feature_enable(adev, ras_if, 0); >+ kfree(ras_if); >+ } >+ > amdgpu_gem_force_release(adev); > amdgpu_vm_manager_fini(adev); > > if (gmc_v9_0_keep_stolen_memory(adev)) >- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); >+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); > > amdgpu_gart_table_vram_free(adev); > amdgpu_bo_fini(adev); >@@ -1123,7 +1285,7 @@ > > switch (adev->asic_type) { > case CHIP_VEGA10: >- if (amdgpu_virt_support_skip_setting(adev)) >+ if (amdgpu_sriov_vf(adev)) > break; > /* fall through */ > case CHIP_VEGA20: >@@ -1137,6 +1299,7 @@ > case CHIP_VEGA12: > break; > case CHIP_RAVEN: >+ /* TODO for renoir */ > soc15_program_register_sequence(adev, > golden_settings_athub_1_0_0, > ARRAY_SIZE(golden_settings_athub_1_0_0)); >@@ -1153,7 +1316,7 @@ > */ > static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) > { >- int r; >+ int r, i; > bool value; > u32 tmp; > >@@ -1171,6 +1334,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ /* TODO for renoir */ > mmhub_v1_0_update_power_gating(adev, true); > break; > default: >@@ -1181,7 +1345,10 @@ > if (r) > return r; > >- r = mmhub_v1_0_gart_enable(adev); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ r = mmhub_v9_4_gart_enable(adev); >+ else >+ r = mmhub_v1_0_gart_enable(adev); > if (r) > return r; > >@@ -1202,8 +1369,13 @@ > value = true; > > gfxhub_v1_0_set_fault_enable_default(adev, value); >- mmhub_v1_0_set_fault_enable_default(adev, value); >- gmc_v9_0_flush_gpu_tlb(adev, 0, 0); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ mmhub_v9_4_set_fault_enable_default(adev, value); >+ else >+ mmhub_v1_0_set_fault_enable_default(adev, value); >+ >+ for (i = 0; i < adev->num_vmhubs; ++i) >+ gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); > > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), >@@ -1243,7 +1415,10 @@ > static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) > { > gfxhub_v1_0_gart_disable(adev); >- mmhub_v1_0_gart_disable(adev); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ mmhub_v9_4_gart_disable(adev); >+ else >+ mmhub_v1_0_gart_disable(adev); > amdgpu_gart_table_vram_unpin(adev); > } > >@@ -1308,14 +1483,26 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >- return mmhub_v1_0_set_clockgating(adev, state); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ mmhub_v9_4_set_clockgating(adev, state); >+ else >+ mmhub_v1_0_set_clockgating(adev, state); >+ >+ athub_v1_0_set_clockgating(adev, state); >+ >+ return 0; > } > > static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >- mmhub_v1_0_get_clockgating(adev, flags); >+ if (adev->asic_type == CHIP_ARCTURUS) >+ mmhub_v9_4_get_clockgating(adev, flags); >+ else >+ mmhub_v1_0_get_clockgating(adev, flags); >+ >+ athub_v1_0_get_clockgating(adev, flags); > } > > static int gmc_v9_0_set_powergating_state(void *handle, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 2019-08-31 15:01:11.848736167 -0500 >@@ -37,4 +37,11 @@ > extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; > extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; > >+/* amdgpu_amdkfd*.c */ >+void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, >+ uint64_t value); >+void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, >+ uint64_t value); >+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, >+ uint32_t vmid, uint64_t value); > #endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/Makefile linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/Makefile >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/Makefile 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/Makefile 2019-08-31 15:01:11.838736166 -0500 >@@ -54,7 +54,7 @@ > amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ > amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ > amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ >- amdgpu_vm_sdma.o amdgpu_discovery.o >+ amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o > > amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o > >@@ -66,7 +66,8 @@ > > amdgpu-y += \ > vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ >- vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o >+ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ >+ arct_reg_init.o navi12_reg_init.o > > # add DF block > amdgpu-y += \ >@@ -77,9 +78,13 @@ > amdgpu-y += \ > gmc_v7_0.o \ > gmc_v8_0.o \ >- gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ >+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ > gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o > >+# add UMC block >+amdgpu-y += \ >+ umc_v6_1.o >+ > # add IH block > amdgpu-y += \ > amdgpu_irq.o \ >@@ -95,7 +100,8 @@ > amdgpu_psp.o \ > psp_v3_1.o \ > psp_v10_0.o \ >- psp_v11_0.o >+ psp_v11_0.o \ >+ psp_v12_0.o > > # add SMC block > amdgpu-y += \ >@@ -144,10 +150,12 @@ > amdgpu-y += \ > amdgpu_vcn.o \ > vcn_v1_0.o \ >- vcn_v2_0.o >+ vcn_v2_0.o \ >+ vcn_v2_5.o > > # add ATHUB block > amdgpu-y += \ >+ athub_v1_0.o \ > athub_v2_0.o > > # add amdkfd interfaces >@@ -162,6 +170,7 @@ > amdgpu_amdkfd_gpuvm.o \ > amdgpu_amdkfd_gfx_v8.o \ > amdgpu_amdkfd_gfx_v9.o \ >+ amdgpu_amdkfd_arcturus.o \ > amdgpu_amdkfd_gfx_v10.o > > ifneq ($(CONFIG_DRM_AMDGPU_CIK),) >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 2019-08-31 15:01:11.848736167 -0500 >@@ -21,13 +21,13 @@ > * > */ > #include "amdgpu.h" >+#include "amdgpu_ras.h" > #include "mmhub_v1_0.h" > > #include "mmhub/mmhub_1_0_offset.h" > #include "mmhub/mmhub_1_0_sh_mask.h" > #include "mmhub/mmhub_1_0_default.h" >-#include "athub/athub_1_0_offset.h" >-#include "athub/athub_1_0_sh_mask.h" >+#include "mmhub/mmhub_9_4_0_offset.h" > #include "vega10_enum.h" > > #include "soc15_common.h" >@@ -35,6 +35,9 @@ > #define mmDAGB0_CNTL_MISC2_RV 0x008f > #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 > >+#define EA_EDC_CNT_MASK 0x3 >+#define EA_EDC_CNT_SHIFT 0x2 >+ > u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) > { > u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); >@@ -111,7 +114,7 @@ > WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, > max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); > >- if (amdgpu_virt_support_skip_setting(adev)) >+ if (amdgpu_sriov_vf(adev)) > return; > > /* Set default page address. */ >@@ -159,7 +162,7 @@ > { > uint32_t tmp; > >- if (amdgpu_virt_support_skip_setting(adev)) >+ if (amdgpu_sriov_vf(adev)) > return; > > /* Setup L2 cache */ >@@ -208,7 +211,7 @@ > > static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) > { >- if (amdgpu_virt_support_skip_setting(adev)) >+ if (amdgpu_sriov_vf(adev)) > return; > > WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, >@@ -348,7 +351,7 @@ > 0); > WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); > >- if (!amdgpu_virt_support_skip_setting(adev)) { >+ if (!amdgpu_sriov_vf(adev)) { > /* Setup L2 cache */ > tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); > tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); >@@ -367,7 +370,7 @@ > { > u32 tmp; > >- if (amdgpu_virt_support_skip_setting(adev)) >+ if (amdgpu_sriov_vf(adev)) > return; > > tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); >@@ -407,7 +410,7 @@ > > void mmhub_v1_0_init(struct amdgpu_device *adev) > { >- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; >+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; > > hub->ctx0_ptb_addr_lo32 = > SOC15_REG_OFFSET(MMHUB, 0, >@@ -491,22 +494,6 @@ > WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); > } > >-static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, >- bool enable) >-{ >- uint32_t def, data; >- >- def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >- >- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) >- data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; >- else >- data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; >- >- if (def != data) >- WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); >-} >- > static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, > bool enable) > { >@@ -523,23 +510,6 @@ > WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); > } > >-static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, >- bool enable) >-{ >- uint32_t def, data; >- >- def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >- >- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && >- (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) >- data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; >- else >- data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; >- >- if(def != data) >- WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); >-} >- > int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, > enum amd_clockgating_state state) > { >@@ -551,14 +521,11 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >+ case CHIP_RENOIR: > mmhub_v1_0_update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); >- athub_update_medium_grain_clock_gating(adev, >- state == AMD_CG_STATE_GATE ? true : false); > mmhub_v1_0_update_medium_grain_light_sleep(adev, > state == AMD_CG_STATE_GATE ? true : false); >- athub_update_medium_grain_light_sleep(adev, >- state == AMD_CG_STATE_GATE ? true : false); > break; > default: > break; >@@ -569,18 +536,85 @@ > > void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) > { >- int data; >+ int data, data1; > > if (amdgpu_sriov_vf(adev)) > *flags = 0; > >+ data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); >+ >+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); >+ > /* AMD_CG_SUPPORT_MC_MGCG */ >- data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); >- if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) >+ if ((data & ATC_L2_MISC_CG__ENABLE_MASK) && >+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) > *flags |= AMD_CG_SUPPORT_MC_MGCG; > > /* AMD_CG_SUPPORT_MC_LS */ >- data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); > if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) > *flags |= AMD_CG_SUPPORT_MC_LS; > } >+ >+static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, >+ void *ras_error_status) >+{ >+ int i; >+ uint32_t ea0_edc_cnt, ea0_edc_cnt2; >+ uint32_t ea1_edc_cnt, ea1_edc_cnt2; >+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; >+ >+ /* EDC CNT will be cleared automatically after read */ >+ ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); >+ ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); >+ ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); >+ ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); >+ >+ /* error count of each error type is recorded by 2 bits, >+ * ce and ue count in EDC_CNT >+ */ >+ for (i = 0; i < 5; i++) { >+ err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); >+ err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); >+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); >+ err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); >+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ } >+ /* successive ue count in EDC_CNT */ >+ for (i = 0; i < 5; i++) { >+ err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); >+ err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); >+ ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; >+ } >+ >+ /* ce and ue count in EDC_CNT2 */ >+ for (i = 0; i < 3; i++) { >+ err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); >+ err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); >+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); >+ err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); >+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ } >+ /* successive ue count in EDC_CNT2 */ >+ for (i = 0; i < 6; i++) { >+ err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); >+ err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); >+ ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; >+ } >+} >+ >+const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { >+ .query_ras_error_count = mmhub_v1_0_query_ras_error_count, >+}; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h 2019-08-31 15:01:11.849736167 -0500 >@@ -23,6 +23,8 @@ > #ifndef __MMHUB_V1_0_H__ > #define __MMHUB_V1_0_H__ > >+extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; >+ > u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); > int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); > void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 2019-08-31 15:01:11.849736167 -0500 >@@ -126,7 +126,7 @@ > /* XXX for emulation, Refer to closed source code.*/ > tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, > 0); >- tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); >+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); > tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); > tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); > WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); >@@ -324,7 +324,7 @@ > > void mmhub_v2_0_init(struct amdgpu_device *adev) > { >- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; >+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; > > hub->ctx0_ptb_addr_lo32 = > SOC15_REG_OFFSET(MMHUB, 0, >@@ -406,6 +406,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > mmhub_v2_0_update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > mmhub_v2_0_update_medium_grain_light_sleep(adev, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 2019-08-31 15:01:11.849736167 -0500 >@@ -0,0 +1,642 @@ >+/* >+ * Copyright 2018 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "amdgpu.h" >+#include "mmhub_v9_4.h" >+ >+#include "mmhub/mmhub_9_4_1_offset.h" >+#include "mmhub/mmhub_9_4_1_sh_mask.h" >+#include "mmhub/mmhub_9_4_1_default.h" >+#include "athub/athub_1_0_offset.h" >+#include "athub/athub_1_0_sh_mask.h" >+#include "vega10_enum.h" >+ >+#include "soc15_common.h" >+ >+#define MMHUB_NUM_INSTANCES 2 >+#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000 >+ >+u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) >+{ >+ /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ >+ u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); >+ u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); >+ >+ base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; >+ base <<= 24; >+ >+ top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; >+ top <<= 24; >+ >+ adev->gmc.fb_start = base; >+ adev->gmc.fb_end = top; >+ >+ return base; >+} >+ >+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, >+ uint32_t vmid, uint64_t value) >+{ >+ /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to >+ * mmVML2VC0_VM_CONTEXT1_* >+ */ >+ int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 >+ - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, >+ dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ lower_32_bits(value)); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, >+ dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ upper_32_bits(value)); >+ >+} >+ >+static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, >+ int hubid) >+{ >+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); >+ >+ mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(adev->gmc.gart_start >> 12)); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(adev->gmc.gart_start >> 44)); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(adev->gmc.gart_end >> 12)); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(adev->gmc.gart_end >> 44)); >+} >+ >+static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, >+ int hubid) >+{ >+ uint64_t value; >+ uint32_t tmp; >+ >+ /* Program the AGP BAR */ >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ adev->gmc.agp_end >> 24); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ adev->gmc.agp_start >> 24); >+ >+ /* Program the system aperture low logical page number. */ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); >+ >+ /* Set default page address. */ >+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + >+ adev->vm_manager.vram_base_offset; >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(value >> 12)); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(value >> 44)); >+ >+ /* Program "protection fault". */ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)(adev->dummy_page_addr >> 12)); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, >+ (u32)((u64)adev->dummy_page_addr >> 44)); >+ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, >+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+} >+ >+static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) >+{ >+ uint32_t tmp; >+ >+ /* Setup TLB control */ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET); >+ >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ENABLE_L1_TLB, 1); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ SYSTEM_ACCESS_MODE, 3); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ENABLE_ADVANCED_DRIVER_MODEL, 1); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ECO_BITS, 0); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ MTYPE, MTYPE_UC);/* XXX for emulation. */ >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ATC_EN, 1); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+} >+ >+static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) >+{ >+ uint32_t tmp; >+ >+ /* Setup L2 cache */ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ ENABLE_L2_CACHE, 1); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ ENABLE_L2_FRAGMENT_PROCESSING, 1); >+ /* XXX for emulation, Refer to closed source code.*/ >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ PDE_FAULT_CLASSIFICATION, 0); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ CONTEXT1_IDENTITY_ACCESS_MODE, 1); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ IDENTITY_MODE_FRAGMENT_SIZE, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, >+ INVALIDATE_ALL_L1_TLBS, 1); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, >+ INVALIDATE_L2_CACHE, 1); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ >+ tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ >+ tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, >+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, >+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+} >+ >+static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, >+ int hubid) >+{ >+ uint32_t tmp; >+ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+} >+ >+static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, >+ int hubid) >+{ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); >+} >+ >+static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) >+{ >+ uint32_t tmp; >+ int i; >+ >+ for (i = 0; i <= 14; i++) { >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ ENABLE_CONTEXT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ PAGE_TABLE_DEPTH, >+ adev->vm_manager.num_level); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, >+ 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ PAGE_TABLE_BLOCK_SIZE, >+ adev->vm_manager.block_size - 9); >+ /* Send no-retry XNACK on fault to suppress VM fault storm. */ >+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, >+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, >+ tmp); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, >+ lower_32_bits(adev->vm_manager.max_pfn - 1)); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, >+ upper_32_bits(adev->vm_manager.max_pfn - 1)); >+ } >+} >+ >+static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, >+ int hubid) >+{ >+ unsigned i; >+ >+ for (i = 0; i < 18; ++i) { >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, >+ 0xffffffff); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, >+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, >+ 0x1f); >+ } >+} >+ >+int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { >+ if (amdgpu_sriov_vf(adev)) { >+ /* >+ * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase >+ * they are VF copy registers so vbios post doesn't >+ * program them, for SRIOV driver need to program them >+ */ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET, >+ adev->gmc.vram_start >> 24); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET, >+ adev->gmc.vram_end >> 24); >+ } >+ >+ /* GART Enable. */ >+ mmhub_v9_4_init_gart_aperture_regs(adev, i); >+ mmhub_v9_4_init_system_aperture_regs(adev, i); >+ mmhub_v9_4_init_tlb_regs(adev, i); >+ mmhub_v9_4_init_cache_regs(adev, i); >+ >+ mmhub_v9_4_enable_system_domain(adev, i); >+ mmhub_v9_4_disable_identity_aperture(adev, i); >+ mmhub_v9_4_setup_vmid_config(adev, i); >+ mmhub_v9_4_program_invalidation(adev, i); >+ } >+ >+ return 0; >+} >+ >+void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) >+{ >+ u32 tmp; >+ u32 i, j; >+ >+ for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { >+ /* Disable all tables */ >+ for (i = 0; i < 16; i++) >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_CNTL, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET + >+ i, 0); >+ >+ /* Setup TLB control */ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ENABLE_L1_TLB, 0); >+ tmp = REG_SET_FIELD(tmp, >+ VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ ENABLE_ADVANCED_DRIVER_MODEL, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ >+ /* Setup L2 cache */ >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, >+ ENABLE_L2_CACHE, 0); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, >+ j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); >+ } >+} >+ >+/** >+ * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling >+ * >+ * @adev: amdgpu_device pointer >+ * @value: true redirects VM faults to the default page >+ */ >+void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) >+{ >+ u32 tmp; >+ int i; >+ >+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { >+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, >+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, >+ value); >+ if (!value) { >+ tmp = REG_SET_FIELD(tmp, >+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ CRASH_ON_NO_RETRY_FAULT, 1); >+ tmp = REG_SET_FIELD(tmp, >+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ CRASH_ON_RETRY_FAULT, 1); >+ } >+ >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); >+ } >+} >+ >+void mmhub_v9_4_init(struct amdgpu_device *adev) >+{ >+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = >+ {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; >+ int i; >+ >+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { >+ hub[i]->ctx0_ptb_addr_lo32 = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->ctx0_ptb_addr_hi32 = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->vm_inv_eng0_req = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->vm_inv_eng0_ack = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->vm_context0_cntl = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2VC0_VM_CONTEXT0_CNTL) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->vm_l2_pro_fault_status = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ hub[i]->vm_l2_pro_fault_cntl = >+ SOC15_REG_OFFSET(MMHUB, 0, >+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + >+ i * MMHUB_INSTANCE_REGISTER_OFFSET; >+ } >+} >+ >+static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, >+ bool enable) >+{ >+ uint32_t def, data, def1, data1; >+ int i, j; >+ int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; >+ >+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { >+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmATCL2_0_ATC_L2_MISC_CG, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET); >+ >+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) >+ data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; >+ else >+ data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; >+ >+ if (def != data) >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data); >+ >+ for (j = 0; j < 5; j++) { >+ def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmDAGB0_CNTL_MISC2, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET + >+ j * dist); >+ if (enable && >+ (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { >+ data1 &= >+ ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); >+ } else { >+ data1 |= >+ (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); >+ } >+ >+ if (def1 != data1) >+ WREG32_SOC15_OFFSET(MMHUB, 0, >+ mmDAGB0_CNTL_MISC2, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET + >+ j * dist, data1); >+ >+ if (i == 1 && j == 3) >+ break; >+ } >+ } >+} >+ >+static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, >+ bool enable) >+{ >+ uint32_t def, data; >+ int i; >+ >+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { >+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0, >+ mmATCL2_0_ATC_L2_MISC_CG, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET); >+ >+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) >+ data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; >+ else >+ data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; >+ >+ if (def != data) >+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, >+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data); >+ } >+} >+ >+int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, >+ enum amd_clockgating_state state) >+{ >+ if (amdgpu_sriov_vf(adev)) >+ return 0; >+ >+ switch (adev->asic_type) { >+ case CHIP_ARCTURUS: >+ mmhub_v9_4_update_medium_grain_clock_gating(adev, >+ state == AMD_CG_STATE_GATE ? true : false); >+ mmhub_v9_4_update_medium_grain_light_sleep(adev, >+ state == AMD_CG_STATE_GATE ? true : false); >+ break; >+ default: >+ break; >+ } >+ >+ return 0; >+} >+ >+void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) >+{ >+ int data, data1; >+ >+ if (amdgpu_sriov_vf(adev)) >+ *flags = 0; >+ >+ /* AMD_CG_SUPPORT_MC_MGCG */ >+ data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); >+ >+ data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); >+ >+ if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) && >+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | >+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) >+ *flags |= AMD_CG_SUPPORT_MC_MGCG; >+ >+ /* AMD_CG_SUPPORT_MC_LS */ >+ if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) >+ *flags |= AMD_CG_SUPPORT_MC_LS; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 2019-08-31 15:01:11.849736167 -0500 >@@ -0,0 +1,36 @@ >+/* >+ * Copyright 2018 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#ifndef __MMHUB_V9_4_H__ >+#define __MMHUB_V9_4_H__ >+ >+u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); >+int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); >+void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); >+void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, >+ bool value); >+void mmhub_v9_4_init(struct amdgpu_device *adev); >+int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, >+ enum amd_clockgating_state state); >+void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 2019-08-31 15:01:11.849736167 -0500 >@@ -449,20 +449,6 @@ > amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); > } > >-static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) >-{ >- adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; >- >- /* Enable L1 security reg access mode by defaul, as non-security VF >- * will no longer be supported. >- */ >- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; >- >- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; >- >- adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; >-} >- > const struct amdgpu_virt_ops xgpu_ai_virt_ops = { > .req_full_gpu = xgpu_ai_request_full_gpu_access, > .rel_full_gpu = xgpu_ai_release_full_gpu_access, >@@ -471,5 +457,4 @@ > .trans_msg = xgpu_ai_mailbox_trans_msg, > .get_pp_clk = xgpu_ai_get_pp_clk, > .force_dpm_level = xgpu_ai_force_dpm_level, >- .init_reg_access_mode = xgpu_ai_init_reg_access_mode, > }; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_ih.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_ih.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 2019-08-31 15:01:11.849736167 -0500 >@@ -21,7 +21,8 @@ > * > */ > >-#include <drm/drmP.h> >+#include <linux/pci.h> >+ > #include "amdgpu.h" > #include "amdgpu_ih.h" > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c 2019-08-31 15:01:11.849736167 -0500 >@@ -29,20 +29,8 @@ > > int navi10_reg_base_init(struct amdgpu_device *adev) > { >- int r, i; >+ int i; > >- if (amdgpu_discovery) { >- r = amdgpu_discovery_reg_base_init(adev); >- if (r) { >- DRM_WARN("failed to init reg base from ip discovery table, " >- "fallback to legacy init method\n"); >- goto legacy_init; >- } >- >- return 0; >- } >- >-legacy_init: > for (i = 0 ; i < MAX_INSTANCE ; ++i) { > adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); > adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c 2019-08-31 15:01:11.849736167 -0500 >@@ -0,0 +1,53 @@ >+/* >+ * Copyright 2018 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "amdgpu.h" >+#include "nv.h" >+ >+#include "soc15_common.h" >+#include "soc15_hw_ip.h" >+#include "navi12_ip_offset.h" >+ >+int navi12_reg_base_init(struct amdgpu_device *adev) >+{ >+ /* HW has more IP blocks, only initialized the blocks needed by driver */ >+ uint32_t i; >+ for (i = 0 ; i < MAX_INSTANCE ; ++i) { >+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); >+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); >+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); >+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); >+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); >+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); >+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); >+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); >+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); >+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); >+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); >+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); >+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); >+ } >+ return 0; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c 2019-08-31 15:01:11.849736167 -0500 >@@ -0,0 +1,54 @@ >+/* >+ * Copyright 2018 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "amdgpu.h" >+#include "nv.h" >+ >+#include "soc15_common.h" >+#include "soc15_hw_ip.h" >+#include "navi14_ip_offset.h" >+ >+int navi14_reg_base_init(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0 ; i < MAX_INSTANCE ; ++i) { >+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); >+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); >+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); >+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); >+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); >+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); >+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); >+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); >+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); >+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); >+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); >+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); >+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); >+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); >+ } >+ >+ return 0; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 2019-08-31 15:01:11.849736167 -0500 >@@ -92,7 +92,7 @@ > } > > static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, >- int doorbell_index) >+ int doorbell_index, int instance) > { > u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 2019-08-31 15:01:11.849736167 -0500 >@@ -91,6 +91,26 @@ > WREG32(reg, doorbell_range); > } > >+static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, >+ int doorbell_index, int instance) >+{ >+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); >+ >+ u32 doorbell_range = RREG32(reg); >+ >+ if (use_doorbell) { >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET, >+ doorbell_index); >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); >+ } else >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); >+ >+ WREG32(reg, doorbell_range); >+} >+ > static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, > bool enable) > { >@@ -282,6 +302,7 @@ > .hdp_flush = nbio_v7_0_hdp_flush, > .get_memsize = nbio_v7_0_get_memsize, > .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, >+ .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range, > .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, > .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, > .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 2019-08-31 15:01:11.849736167 -0500 >@@ -31,6 +31,25 @@ > > #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c > >+/* >+ * These are nbio v7_4_1 registers mask. Temporarily define these here since >+ * nbio v7_4_1 header is incomplete. >+ */ >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L >+#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L >+ >+#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc >+#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 >+//BIF_MMSCH1_DOORBELL_RANGE >+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2 >+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10 >+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL >+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L >+ > static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) > { > WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, >@@ -75,10 +94,24 @@ > static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, > bool use_doorbell, int doorbell_index, int doorbell_size) > { >- u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : >- SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); >+ u32 reg, doorbell_range; >+ >+ if (instance < 2) >+ reg = instance + >+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); >+ else >+ /* >+ * These registers address of SDMA2~7 is not consecutive >+ * from SDMA0~1. Need plus 4 dwords offset. >+ * >+ * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 >+ * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 >+ * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 >+ */ >+ reg = instance + 0x4 + >+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); > >- u32 doorbell_range = RREG32(reg); >+ doorbell_range = RREG32(reg); > > if (use_doorbell) { > doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); >@@ -89,6 +122,32 @@ > WREG32(reg, doorbell_range); > } > >+static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, >+ int doorbell_index, int instance) >+{ >+ u32 reg; >+ u32 doorbell_range; >+ >+ if (instance) >+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); >+ else >+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); >+ >+ doorbell_range = RREG32(reg); >+ >+ if (use_doorbell) { >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET, >+ doorbell_index); >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); >+ } else >+ doorbell_range = REG_SET_FIELD(doorbell_range, >+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); >+ >+ WREG32(reg, doorbell_range); >+} >+ > static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, > bool enable) > { >@@ -220,6 +279,12 @@ > .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, > .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, > .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, >+ .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, >+ .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, >+ .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, >+ .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, >+ .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, >+ .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, > }; > > static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) >@@ -261,6 +326,7 @@ > .hdp_flush = nbio_v7_4_hdp_flush, > .get_memsize = nbio_v7_4_get_memsize, > .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, >+ .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, > .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, > .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, > .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.c 2019-08-31 15:01:12.265736204 -0500 >@@ -23,7 +23,8 @@ > #include <linux/firmware.h> > #include <linux/slab.h> > #include <linux/module.h> >-#include <drm/drmP.h> >+#include <linux/pci.h> >+ > #include "amdgpu.h" > #include "amdgpu_atombios.h" > #include "amdgpu_ih.h" >@@ -289,6 +290,18 @@ > > return ret; > } >+ >+static enum amd_reset_method >+nv_asic_reset_method(struct amdgpu_device *adev) >+{ >+ struct smu_context *smu = &adev->smu; >+ >+ if (smu_baco_is_support(smu)) >+ return AMD_RESET_METHOD_BACO; >+ else >+ return AMD_RESET_METHOD_MODE1; >+} >+ > static int nv_asic_reset(struct amdgpu_device *adev) > { > >@@ -303,10 +316,13 @@ > int ret = 0; > struct smu_context *smu = &adev->smu; > >- if (smu_baco_is_support(smu)) >+ if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { >+ amdgpu_inc_vram_lost(adev); > ret = smu_baco_reset(smu); >- else >+ } else { >+ amdgpu_inc_vram_lost(adev); > ret = nv_asic_mode1_reset(adev); >+ } > > return ret; > } >@@ -363,23 +379,55 @@ > .funcs = &nv_common_ip_funcs, > }; > >-int nv_set_ip_blocks(struct amdgpu_device *adev) >+static int nv_reg_base_init(struct amdgpu_device *adev) > { >- /* Set IP register base before any HW register access */ >+ int r; >+ >+ if (amdgpu_discovery) { >+ r = amdgpu_discovery_reg_base_init(adev); >+ if (r) { >+ DRM_WARN("failed to init reg base from ip discovery table, " >+ "fallback to legacy init method\n"); >+ goto legacy_init; >+ } >+ >+ return 0; >+ } >+ >+legacy_init: > switch (adev->asic_type) { > case CHIP_NAVI10: > navi10_reg_base_init(adev); > break; >+ case CHIP_NAVI14: >+ navi14_reg_base_init(adev); >+ break; >+ case CHIP_NAVI12: >+ navi12_reg_base_init(adev); >+ break; > default: > return -EINVAL; > } > >+ return 0; >+} >+ >+int nv_set_ip_blocks(struct amdgpu_device *adev) >+{ >+ int r; >+ >+ /* Set IP register base before any HW register access */ >+ r = nv_reg_base_init(adev); >+ if (r) >+ return r; >+ > adev->nbio_funcs = &nbio_v2_3_funcs; > > adev->nbio_funcs->detect_hw_virt(adev); > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: > amdgpu_device_ip_block_add(adev, &nv_common_ip_block); > amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); > amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); >@@ -402,6 +450,27 @@ > if (adev->enable_mes) > amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); > break; >+ case CHIP_NAVI12: >+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block); >+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); >+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && >+ is_support_sw_smu(adev)) >+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); >+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) >+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); >+#if defined(CONFIG_DRM_AMD_DC) >+ else if (amdgpu_device_has_dc_support(adev)) >+ amdgpu_device_ip_block_add(adev, &dm_ip_block); >+#endif >+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && >+ is_support_sw_smu(adev)) >+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); >+ break; > default: > return -EINVAL; > } >@@ -496,6 +565,7 @@ > .read_bios_from_rom = &nv_read_bios_from_rom, > .read_register = &nv_read_register, > .reset = &nv_asic_reset, >+ .reset_method = &nv_asic_reset_method, > .set_vga_state = &nv_vga_set_state, > .get_xclk = &nv_get_xclk, > .set_uvd_clocks = &nv_set_uvd_clocks, >@@ -511,7 +581,6 @@ > > static int nv_common_early_init(void *handle) > { >- bool psp_enabled = false; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > adev->smc_rreg = NULL; >@@ -528,10 +597,6 @@ > > adev->asic_funcs = &nv_asic_funcs; > >- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && >- (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) >- psp_enabled = true; >- > adev->rev_id = nv_get_rev_id(adev); > adev->external_rev_id = 0xff; > switch (adev->asic_type) { >@@ -555,6 +620,46 @@ > AMD_PG_SUPPORT_ATHUB; > adev->external_rev_id = adev->rev_id + 0x1; > break; >+ case CHIP_NAVI14: >+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | >+ AMD_CG_SUPPORT_GFX_CGCG | >+ AMD_CG_SUPPORT_IH_CG | >+ AMD_CG_SUPPORT_HDP_MGCG | >+ AMD_CG_SUPPORT_HDP_LS | >+ AMD_CG_SUPPORT_SDMA_MGCG | >+ AMD_CG_SUPPORT_SDMA_LS | >+ AMD_CG_SUPPORT_MC_MGCG | >+ AMD_CG_SUPPORT_MC_LS | >+ AMD_CG_SUPPORT_ATHUB_MGCG | >+ AMD_CG_SUPPORT_ATHUB_LS | >+ AMD_CG_SUPPORT_VCN_MGCG | >+ AMD_CG_SUPPORT_BIF_MGCG | >+ AMD_CG_SUPPORT_BIF_LS; >+ adev->pg_flags = AMD_PG_SUPPORT_VCN | >+ AMD_PG_SUPPORT_VCN_DPG; >+ adev->external_rev_id = adev->rev_id + 20; >+ break; >+ case CHIP_NAVI12: >+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | >+ AMD_CG_SUPPORT_GFX_MGLS | >+ AMD_CG_SUPPORT_GFX_CGCG | >+ AMD_CG_SUPPORT_GFX_CP_LS | >+ AMD_CG_SUPPORT_GFX_RLC_LS | >+ AMD_CG_SUPPORT_IH_CG | >+ AMD_CG_SUPPORT_HDP_MGCG | >+ AMD_CG_SUPPORT_HDP_LS | >+ AMD_CG_SUPPORT_SDMA_MGCG | >+ AMD_CG_SUPPORT_SDMA_LS | >+ AMD_CG_SUPPORT_MC_MGCG | >+ AMD_CG_SUPPORT_MC_LS | >+ AMD_CG_SUPPORT_ATHUB_MGCG | >+ AMD_CG_SUPPORT_ATHUB_LS | >+ AMD_CG_SUPPORT_VCN_MGCG; >+ adev->pg_flags = AMD_PG_SUPPORT_VCN | >+ AMD_PG_SUPPORT_VCN_DPG | >+ AMD_PG_SUPPORT_ATHUB; >+ adev->external_rev_id = adev->rev_id + 0xa; >+ break; > default: > /* FIXME: not supported yet */ > return -EINVAL; >@@ -747,6 +852,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > adev->nbio_funcs->update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > adev->nbio_funcs->update_medium_grain_light_sleep(adev, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.h 2019-08-31 15:01:11.849736167 -0500 >@@ -30,4 +30,6 @@ > u32 me, u32 pipe, u32 queue, u32 vmid); > int nv_set_ip_blocks(struct amdgpu_device *adev); > int navi10_reg_base_init(struct amdgpu_device *adev); >+int navi14_reg_base_init(struct amdgpu_device *adev); >+int navi12_reg_base_init(struct amdgpu_device *adev); > #endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 2019-08-31 15:01:11.849736167 -0500 >@@ -233,8 +233,15 @@ > GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ > GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ > GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ >- GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */ >- GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */ >+ GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */ >+ GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */ >+ GFX_FW_TYPE_DMUB = 51, /* DMUB RN */ >+ GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */ >+ GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */ >+ GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */ >+ GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ >+ GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ >+ GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ > GFX_FW_TYPE_MAX > }; > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c 2019-08-31 15:01:11.850736167 -0500 >@@ -190,7 +190,6 @@ > } > > static int psp_v10_0_cmd_submit(struct psp_context *psp, >- struct amdgpu_firmware_info *ucode, > uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, > int index) > { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 2019-08-31 15:01:11.850736167 -0500 >@@ -43,6 +43,12 @@ > MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); > MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); > MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); > > /* address block */ > #define smnMP1_FIRMWARE_FLAGS 0x3010024 >@@ -60,6 +66,7 @@ > int err = 0; > const struct psp_firmware_header_v1_0 *sos_hdr; > const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; >+ const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; > const struct psp_firmware_header_v1_0 *asd_hdr; > const struct ta_firmware_header_v1_0 *ta_hdr; > >@@ -72,6 +79,15 @@ > case CHIP_NAVI10: > chip_name = "navi10"; > break; >+ case CHIP_NAVI14: >+ chip_name = "navi14"; >+ break; >+ case CHIP_NAVI12: >+ chip_name = "navi12"; >+ break; >+ case CHIP_ARCTURUS: >+ chip_name = "arcturus"; >+ break; > default: > BUG(); > } >@@ -107,6 +123,12 @@ > adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + > le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); > } >+ if (sos_hdr->header.header_version_minor == 2) { >+ sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; >+ adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); >+ adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + >+ le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); >+ } > break; > default: > dev_err(adev->dev, >@@ -158,6 +180,9 @@ > } > break; > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: >+ case CHIP_ARCTURUS: > break; > default: > BUG(); >@@ -473,7 +498,6 @@ > } > > static int psp_v11_0_cmd_submit(struct psp_context *psp, >- struct amdgpu_firmware_info *ucode, > uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, > int index) > { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 2019-08-31 15:01:11.850736167 -0500 >@@ -0,0 +1,565 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ */ >+ >+#include <linux/firmware.h> >+#include <linux/module.h> >+#include "amdgpu.h" >+#include "amdgpu_psp.h" >+#include "amdgpu_ucode.h" >+#include "soc15_common.h" >+#include "psp_v12_0.h" >+ >+#include "mp/mp_12_0_0_offset.h" >+#include "mp/mp_12_0_0_sh_mask.h" >+#include "gc/gc_9_0_offset.h" >+#include "sdma0/sdma0_4_0_offset.h" >+#include "nbio/nbio_7_4_offset.h" >+ >+#include "oss/osssys_4_0_offset.h" >+#include "oss/osssys_4_0_sh_mask.h" >+ >+MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); >+/* address block */ >+#define smnMP1_FIRMWARE_FLAGS 0x3010024 >+ >+static int psp_v12_0_init_microcode(struct psp_context *psp) >+{ >+ struct amdgpu_device *adev = psp->adev; >+ const char *chip_name; >+ char fw_name[30]; >+ int err = 0; >+ const struct psp_firmware_header_v1_0 *asd_hdr; >+ >+ DRM_DEBUG("\n"); >+ >+ switch (adev->asic_type) { >+ case CHIP_RENOIR: >+ chip_name = "renoir"; >+ break; >+ default: >+ BUG(); >+ } >+ >+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); >+ err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); >+ if (err) >+ goto out1; >+ >+ err = amdgpu_ucode_validate(adev->psp.asd_fw); >+ if (err) >+ goto out1; >+ >+ asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; >+ adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); >+ adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); >+ adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); >+ adev->psp.asd_start_addr = (uint8_t *)asd_hdr + >+ le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); >+ >+ return 0; >+ >+out1: >+ release_firmware(adev->psp.asd_fw); >+ adev->psp.asd_fw = NULL; >+ >+ return err; >+} >+ >+static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) >+{ >+ int ret; >+ uint32_t psp_gfxdrv_command_reg = 0; >+ struct amdgpu_device *adev = psp->adev; >+ uint32_t sol_reg; >+ >+ /* Check sOS sign of life register to confirm sys driver and sOS >+ * are already been loaded. >+ */ >+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); >+ if (sol_reg) { >+ psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); >+ printk("sos fw version = 0x%x.\n", psp->sos_fw_version); >+ return 0; >+ } >+ >+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), >+ 0x80000000, 0x80000000, false); >+ if (ret) >+ return ret; >+ >+ memset(psp->fw_pri_buf, 0, PSP_1_MEG); >+ >+ /* Copy PSP System Driver binary to memory */ >+ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); >+ >+ /* Provide the sys driver to bootloader */ >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, >+ (uint32_t)(psp->fw_pri_mc_addr >> 20)); >+ psp_gfxdrv_command_reg = 1 << 16; >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, >+ psp_gfxdrv_command_reg); >+ >+ /* there might be handshake issue with hardware which needs delay */ >+ mdelay(20); >+ >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), >+ 0x80000000, 0x80000000, false); >+ >+ return ret; >+} >+ >+static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) >+{ >+ int ret; >+ unsigned int psp_gfxdrv_command_reg = 0; >+ struct amdgpu_device *adev = psp->adev; >+ uint32_t sol_reg; >+ >+ /* Check sOS sign of life register to confirm sys driver and sOS >+ * are already been loaded. >+ */ >+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); >+ if (sol_reg) >+ return 0; >+ >+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), >+ 0x80000000, 0x80000000, false); >+ if (ret) >+ return ret; >+ >+ memset(psp->fw_pri_buf, 0, PSP_1_MEG); >+ >+ /* Copy Secure OS binary to PSP memory */ >+ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); >+ >+ /* Provide the PSP secure OS to bootloader */ >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, >+ (uint32_t)(psp->fw_pri_mc_addr >> 20)); >+ psp_gfxdrv_command_reg = 2 << 16; >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, >+ psp_gfxdrv_command_reg); >+ >+ /* there might be handshake issue with hardware which needs delay */ >+ mdelay(20); >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), >+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), >+ 0, true); >+ >+ return ret; >+} >+ >+static void psp_v12_0_reroute_ih(struct psp_context *psp) >+{ >+ struct amdgpu_device *adev = psp->adev; >+ uint32_t tmp; >+ >+ /* Change IH ring for VMC */ >+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); >+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); >+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); >+ >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); >+ >+ mdelay(20); >+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), >+ 0x80000000, 0x8000FFFF, false); >+ >+ /* Change IH ring for UMC */ >+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); >+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); >+ >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); >+ >+ mdelay(20); >+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), >+ 0x80000000, 0x8000FFFF, false); >+} >+ >+static int psp_v12_0_ring_init(struct psp_context *psp, >+ enum psp_ring_type ring_type) >+{ >+ int ret = 0; >+ struct psp_ring *ring; >+ struct amdgpu_device *adev = psp->adev; >+ >+ psp_v12_0_reroute_ih(psp); >+ >+ ring = &psp->km_ring; >+ >+ ring->ring_type = ring_type; >+ >+ /* allocate 4k Page of Local Frame Buffer memory for ring */ >+ ring->ring_size = 0x1000; >+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, >+ AMDGPU_GEM_DOMAIN_VRAM, >+ &adev->firmware.rbuf, >+ &ring->ring_mem_mc_addr, >+ (void **)&ring->ring_mem); >+ if (ret) { >+ ring->ring_size = 0; >+ return ret; >+ } >+ >+ return 0; >+} >+ >+static bool psp_v12_0_support_vmr_ring(struct psp_context *psp) >+{ >+ if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) >+ return true; >+ return false; >+} >+ >+static int psp_v12_0_ring_create(struct psp_context *psp, >+ enum psp_ring_type ring_type) >+{ >+ int ret = 0; >+ unsigned int psp_ring_reg = 0; >+ struct psp_ring *ring = &psp->km_ring; >+ struct amdgpu_device *adev = psp->adev; >+ >+ if (psp_v12_0_support_vmr_ring(psp)) { >+ /* Write low address of the ring to C2PMSG_102 */ >+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); >+ /* Write high address of the ring to C2PMSG_103 */ >+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); >+ >+ /* Write the ring initialization command to C2PMSG_101 */ >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, >+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING); >+ >+ /* there might be handshake issue with hardware which needs delay */ >+ mdelay(20); >+ >+ /* Wait for response flag (bit 31) in C2PMSG_101 */ >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), >+ 0x80000000, 0x8000FFFF, false); >+ >+ } else { >+ /* Write low address of the ring to C2PMSG_69 */ >+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); >+ /* Write high address of the ring to C2PMSG_70 */ >+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); >+ /* Write size of ring to C2PMSG_71 */ >+ psp_ring_reg = ring->ring_size; >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); >+ /* Write the ring initialization command to C2PMSG_64 */ >+ psp_ring_reg = ring_type; >+ psp_ring_reg = psp_ring_reg << 16; >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); >+ >+ /* there might be handshake issue with hardware which needs delay */ >+ mdelay(20); >+ >+ /* Wait for response flag (bit 31) in C2PMSG_64 */ >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), >+ 0x80000000, 0x8000FFFF, false); >+ } >+ >+ return ret; >+} >+ >+static int psp_v12_0_ring_stop(struct psp_context *psp, >+ enum psp_ring_type ring_type) >+{ >+ int ret = 0; >+ struct amdgpu_device *adev = psp->adev; >+ >+ /* Write the ring destroy command*/ >+ if (psp_v12_0_support_vmr_ring(psp)) >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, >+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); >+ else >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, >+ GFX_CTRL_CMD_ID_DESTROY_RINGS); >+ >+ /* there might be handshake issue with hardware which needs delay */ >+ mdelay(20); >+ >+ /* Wait for response flag (bit 31) */ >+ if (psp_v12_0_support_vmr_ring(psp)) >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), >+ 0x80000000, 0x80000000, false); >+ else >+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), >+ 0x80000000, 0x80000000, false); >+ >+ return ret; >+} >+ >+static int psp_v12_0_ring_destroy(struct psp_context *psp, >+ enum psp_ring_type ring_type) >+{ >+ int ret = 0; >+ struct psp_ring *ring = &psp->km_ring; >+ struct amdgpu_device *adev = psp->adev; >+ >+ ret = psp_v12_0_ring_stop(psp, ring_type); >+ if (ret) >+ DRM_ERROR("Fail to stop psp ring\n"); >+ >+ amdgpu_bo_free_kernel(&adev->firmware.rbuf, >+ &ring->ring_mem_mc_addr, >+ (void **)&ring->ring_mem); >+ >+ return ret; >+} >+ >+static int psp_v12_0_cmd_submit(struct psp_context *psp, >+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, >+ int index) >+{ >+ unsigned int psp_write_ptr_reg = 0; >+ struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; >+ struct psp_ring *ring = &psp->km_ring; >+ struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; >+ struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + >+ ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; >+ struct amdgpu_device *adev = psp->adev; >+ uint32_t ring_size_dw = ring->ring_size / 4; >+ uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; >+ >+ /* KM (GPCOM) prepare write pointer */ >+ if (psp_v12_0_support_vmr_ring(psp)) >+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); >+ else >+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); >+ >+ /* Update KM RB frame pointer to new frame */ >+ /* write_frame ptr increments by size of rb_frame in bytes */ >+ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ >+ if ((psp_write_ptr_reg % ring_size_dw) == 0) >+ write_frame = ring_buffer_start; >+ else >+ write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); >+ /* Check invalid write_frame ptr address */ >+ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { >+ DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", >+ ring_buffer_start, ring_buffer_end, write_frame); >+ DRM_ERROR("write_frame is pointing to address out of bounds\n"); >+ return -EINVAL; >+ } >+ >+ /* Initialize KM RB frame */ >+ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); >+ >+ /* Update KM RB frame */ >+ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); >+ write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); >+ write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); >+ write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); >+ write_frame->fence_value = index; >+ >+ /* Update the write Pointer in DWORDs */ >+ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; >+ if (psp_v12_0_support_vmr_ring(psp)) { >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); >+ } else >+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); >+ >+ return 0; >+} >+ >+static int >+psp_v12_0_sram_map(struct amdgpu_device *adev, >+ unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, >+ unsigned int *sram_data_reg_offset, >+ enum AMDGPU_UCODE_ID ucode_id) >+{ >+ int ret = 0; >+ >+ switch (ucode_id) { >+/* TODO: needs to confirm */ >+#if 0 >+ case AMDGPU_UCODE_ID_SMC: >+ *sram_offset = 0; >+ *sram_addr_reg_offset = 0; >+ *sram_data_reg_offset = 0; >+ break; >+#endif >+ >+ case AMDGPU_UCODE_ID_CP_CE: >+ *sram_offset = 0x0; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_CP_PFP: >+ *sram_offset = 0x0; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_CP_ME: >+ *sram_offset = 0x0; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_CP_MEC1: >+ *sram_offset = 0x10000; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_CP_MEC2: >+ *sram_offset = 0x10000; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_RLC_G: >+ *sram_offset = 0x2000; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); >+ break; >+ >+ case AMDGPU_UCODE_ID_SDMA0: >+ *sram_offset = 0x0; >+ *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); >+ *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); >+ break; >+ >+/* TODO: needs to confirm */ >+#if 0 >+ case AMDGPU_UCODE_ID_SDMA1: >+ *sram_offset = ; >+ *sram_addr_reg_offset = ; >+ break; >+ >+ case AMDGPU_UCODE_ID_UVD: >+ *sram_offset = ; >+ *sram_addr_reg_offset = ; >+ break; >+ >+ case AMDGPU_UCODE_ID_VCE: >+ *sram_offset = ; >+ *sram_addr_reg_offset = ; >+ break; >+#endif >+ >+ case AMDGPU_UCODE_ID_MAXIMUM: >+ default: >+ ret = -EINVAL; >+ break; >+ } >+ >+ return ret; >+} >+ >+static bool psp_v12_0_compare_sram_data(struct psp_context *psp, >+ struct amdgpu_firmware_info *ucode, >+ enum AMDGPU_UCODE_ID ucode_type) >+{ >+ int err = 0; >+ unsigned int fw_sram_reg_val = 0; >+ unsigned int fw_sram_addr_reg_offset = 0; >+ unsigned int fw_sram_data_reg_offset = 0; >+ unsigned int ucode_size; >+ uint32_t *ucode_mem = NULL; >+ struct amdgpu_device *adev = psp->adev; >+ >+ err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, >+ &fw_sram_data_reg_offset, ucode_type); >+ if (err) >+ return false; >+ >+ WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); >+ >+ ucode_size = ucode->ucode_size; >+ ucode_mem = (uint32_t *)ucode->kaddr; >+ while (ucode_size) { >+ fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); >+ >+ if (*ucode_mem != fw_sram_reg_val) >+ return false; >+ >+ ucode_mem++; >+ /* 4 bytes */ >+ ucode_size -= 4; >+ } >+ >+ return true; >+} >+ >+static int psp_v12_0_mode1_reset(struct psp_context *psp) >+{ >+ int ret; >+ uint32_t offset; >+ struct amdgpu_device *adev = psp->adev; >+ >+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); >+ >+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); >+ >+ if (ret) { >+ DRM_INFO("psp is not working correctly before mode1 reset!\n"); >+ return -EINVAL; >+ } >+ >+ /*send the mode 1 reset command*/ >+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); >+ >+ msleep(500); >+ >+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); >+ >+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); >+ >+ if (ret) { >+ DRM_INFO("psp mode 1 reset failed!\n"); >+ return -EINVAL; >+ } >+ >+ DRM_INFO("psp mode1 reset succeed \n"); >+ >+ return 0; >+} >+ >+static const struct psp_funcs psp_v12_0_funcs = { >+ .init_microcode = psp_v12_0_init_microcode, >+ .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, >+ .bootloader_load_sos = psp_v12_0_bootloader_load_sos, >+ .ring_init = psp_v12_0_ring_init, >+ .ring_create = psp_v12_0_ring_create, >+ .ring_stop = psp_v12_0_ring_stop, >+ .ring_destroy = psp_v12_0_ring_destroy, >+ .cmd_submit = psp_v12_0_cmd_submit, >+ .compare_sram_data = psp_v12_0_compare_sram_data, >+ .mode1_reset = psp_v12_0_mode1_reset, >+}; >+ >+void psp_v12_0_set_psp_funcs(struct psp_context *psp) >+{ >+ psp->funcs = &psp_v12_0_funcs; >+} >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h 2019-08-31 15:01:12.265736204 -0500 >@@ -0,0 +1,30 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#ifndef __PSP_V12_0_H__ >+#define __PSP_V12_0_H__ >+ >+#include "amdgpu_psp.h" >+ >+void psp_v12_0_set_psp_funcs(struct psp_context *psp); >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 2019-08-31 15:01:11.850736167 -0500 >@@ -411,7 +411,6 @@ > } > > static int psp_v3_1_cmd_submit(struct psp_context *psp, >- struct amdgpu_firmware_info *ucode, > uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, > int index) > { >@@ -636,7 +635,7 @@ > > static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) > { >- if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) >+ if (amdgpu_sriov_vf(psp->adev)) > return true; > > return false; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 2019-08-31 15:01:11.850736167 -0500 >@@ -34,6 +34,18 @@ > #include "sdma0/sdma0_4_2_sh_mask.h" > #include "sdma1/sdma1_4_2_offset.h" > #include "sdma1/sdma1_4_2_sh_mask.h" >+#include "sdma2/sdma2_4_2_2_offset.h" >+#include "sdma2/sdma2_4_2_2_sh_mask.h" >+#include "sdma3/sdma3_4_2_2_offset.h" >+#include "sdma3/sdma3_4_2_2_sh_mask.h" >+#include "sdma4/sdma4_4_2_2_offset.h" >+#include "sdma4/sdma4_4_2_2_sh_mask.h" >+#include "sdma5/sdma5_4_2_2_offset.h" >+#include "sdma5/sdma5_4_2_2_sh_mask.h" >+#include "sdma6/sdma6_4_2_2_offset.h" >+#include "sdma6/sdma6_4_2_2_sh_mask.h" >+#include "sdma7/sdma7_4_2_2_offset.h" >+#include "sdma7/sdma7_4_2_2_sh_mask.h" > #include "hdp/hdp_4_0_offset.h" > #include "sdma0/sdma0_4_1_default.h" > >@@ -55,6 +67,8 @@ > MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); > MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); > MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); >+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); >+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); > > #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L > #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L >@@ -202,25 +216,132 @@ > SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) > }; > >+static const struct soc15_reg_golden golden_settings_sdma_arct[] = >+{ >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), >+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) >+}; >+ >+static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), >+}; >+ > static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, > u32 instance, u32 offset) > { >- return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : >- (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); >+ switch (instance) { >+ case 0: >+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); >+ case 1: >+ return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); >+ case 2: >+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); >+ case 3: >+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); >+ case 4: >+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); >+ case 5: >+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); >+ case 6: >+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); >+ case 7: >+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); >+ default: >+ break; >+ } >+ return 0; >+} >+ >+static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) >+{ >+ switch (seq_num) { >+ case 0: >+ return SOC15_IH_CLIENTID_SDMA0; >+ case 1: >+ return SOC15_IH_CLIENTID_SDMA1; >+ case 2: >+ return SOC15_IH_CLIENTID_SDMA2; >+ case 3: >+ return SOC15_IH_CLIENTID_SDMA3; >+ case 4: >+ return SOC15_IH_CLIENTID_SDMA4; >+ case 5: >+ return SOC15_IH_CLIENTID_SDMA5; >+ case 6: >+ return SOC15_IH_CLIENTID_SDMA6; >+ case 7: >+ return SOC15_IH_CLIENTID_SDMA7; >+ default: >+ break; >+ } >+ return -EINVAL; >+} >+ >+static int sdma_v4_0_irq_id_to_seq(unsigned client_id) >+{ >+ switch (client_id) { >+ case SOC15_IH_CLIENTID_SDMA0: >+ return 0; >+ case SOC15_IH_CLIENTID_SDMA1: >+ return 1; >+ case SOC15_IH_CLIENTID_SDMA2: >+ return 2; >+ case SOC15_IH_CLIENTID_SDMA3: >+ return 3; >+ case SOC15_IH_CLIENTID_SDMA4: >+ return 4; >+ case SOC15_IH_CLIENTID_SDMA5: >+ return 5; >+ case SOC15_IH_CLIENTID_SDMA6: >+ return 6; >+ case SOC15_IH_CLIENTID_SDMA7: >+ return 7; >+ default: >+ break; >+ } >+ return -EINVAL; > } > > static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) > { > switch (adev->asic_type) { > case CHIP_VEGA10: >- if (!amdgpu_virt_support_skip_setting(adev)) { >- soc15_program_register_sequence(adev, >- golden_settings_sdma_4, >- ARRAY_SIZE(golden_settings_sdma_4)); >- soc15_program_register_sequence(adev, >- golden_settings_sdma_vg10, >- ARRAY_SIZE(golden_settings_sdma_vg10)); >- } >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_4, >+ ARRAY_SIZE(golden_settings_sdma_4)); >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_vg10, >+ ARRAY_SIZE(golden_settings_sdma_vg10)); > break; > case CHIP_VEGA12: > soc15_program_register_sequence(adev, >@@ -241,6 +362,11 @@ > golden_settings_sdma1_4_2, > ARRAY_SIZE(golden_settings_sdma1_4_2)); > break; >+ case CHIP_ARCTURUS: >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_arct, >+ ARRAY_SIZE(golden_settings_sdma_arct)); >+ break; > case CHIP_RAVEN: > soc15_program_register_sequence(adev, > golden_settings_sdma_4_1, >@@ -254,11 +380,53 @@ > golden_settings_sdma_rv1, > ARRAY_SIZE(golden_settings_sdma_rv1)); > break; >+ case CHIP_RENOIR: >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_4_3, >+ ARRAY_SIZE(golden_settings_sdma_4_3)); >+ break; > default: > break; > } > } > >+static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) >+{ >+ int err = 0; >+ const struct sdma_firmware_header_v1_0 *hdr; >+ >+ err = amdgpu_ucode_validate(sdma_inst->fw); >+ if (err) >+ return err; >+ >+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; >+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); >+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); >+ >+ if (sdma_inst->feature_version >= 20) >+ sdma_inst->burst_nop = true; >+ >+ return 0; >+} >+ >+static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ if (adev->sdma.instance[i].fw != NULL) >+ release_firmware(adev->sdma.instance[i].fw); >+ >+ /* arcturus shares the same FW memory across >+ all SDMA isntances */ >+ if (adev->asic_type == CHIP_ARCTURUS) >+ break; >+ } >+ >+ memset((void*)adev->sdma.instance, 0, >+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); >+} >+ > /** > * sdma_v4_0_init_microcode - load ucode images from disk > * >@@ -278,7 +446,6 @@ > int err = 0, i; > struct amdgpu_firmware_info *info = NULL; > const struct common_firmware_header *header = NULL; >- const struct sdma_firmware_header_v1_0 *hdr; > > DRM_DEBUG("\n"); > >@@ -300,30 +467,52 @@ > else > chip_name = "raven"; > break; >+ case CHIP_ARCTURUS: >+ chip_name = "arcturus"; >+ break; >+ case CHIP_RENOIR: >+ chip_name = "renoir"; >+ break; > default: > BUG(); > } > >- for (i = 0; i < adev->sdma.num_instances; i++) { >- if (i == 0) >- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); >- else >- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); >- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); >- if (err) >- goto out; >- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); >- if (err) >- goto out; >- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; >- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); >- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); >- if (adev->sdma.instance[i].feature_version >= 20) >- adev->sdma.instance[i].burst_nop = true; >- DRM_DEBUG("psp_load == '%s'\n", >- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); >+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); >+ >+ err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); >+ if (err) >+ goto out; >+ >+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); >+ if (err) >+ goto out; >+ >+ for (i = 1; i < adev->sdma.num_instances; i++) { >+ if (adev->asic_type == CHIP_ARCTURUS) { >+ /* Acturus will leverage the same FW memory >+ for every SDMA instance */ >+ memcpy((void*)&adev->sdma.instance[i], >+ (void*)&adev->sdma.instance[0], >+ sizeof(struct amdgpu_sdma_instance)); >+ } >+ else { >+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); >+ >+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); >+ if (err) >+ goto out; >+ >+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); >+ if (err) >+ goto out; >+ } >+ } > >- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >+ DRM_DEBUG("psp_load == '%s'\n", >+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); >+ >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >+ for (i = 0; i < adev->sdma.num_instances; i++) { > info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; > info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; > info->fw = adev->sdma.instance[i].fw; >@@ -332,13 +521,11 @@ > ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); > } > } >+ > out: > if (err) { > DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); >- for (i = 0; i < adev->sdma.num_instances; i++) { >- release_firmware(adev->sdma.instance[i].fw); >- adev->sdma.instance[i].fw = NULL; >- } >+ sdma_v4_0_destroy_inst_ctx(adev); > } > return err; > } >@@ -561,10 +748,7 @@ > u32 ref_and_mask = 0; > const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; > >- if (ring->me == 0) >- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; >- else >- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; >+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; > > sdma_v4_0_wait_reg_mem(ring, 0, 1, > adev->nbio_funcs->get_hdp_flush_done_offset(adev), >@@ -620,26 +804,27 @@ > */ > static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) > { >- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; >- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; >+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; > u32 rb_cntl, ib_cntl; >- int i; >+ int i, unset = 0; >+ >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ sdma[i] = &adev->sdma.instance[i].ring; > >- if ((adev->mman.buffer_funcs_ring == sdma0) || >- (adev->mman.buffer_funcs_ring == sdma1)) >+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { > amdgpu_ttm_set_buffer_funcs_status(adev, false); >+ unset = 1; >+ } > >- for (i = 0; i < adev->sdma.num_instances; i++) { > rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); > WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); > ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); > ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); > WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); >- } > >- sdma0->sched.ready = false; >- sdma1->sched.ready = false; >+ sdma[i]->sched.ready = false; >+ } > } > > /** >@@ -663,16 +848,20 @@ > */ > static void sdma_v4_0_page_stop(struct amdgpu_device *adev) > { >- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; >- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; >+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; > u32 rb_cntl, ib_cntl; > int i; >- >- if ((adev->mman.buffer_funcs_ring == sdma0) || >- (adev->mman.buffer_funcs_ring == sdma1)) >- amdgpu_ttm_set_buffer_funcs_status(adev, false); >+ bool unset = false; > > for (i = 0; i < adev->sdma.num_instances; i++) { >+ sdma[i] = &adev->sdma.instance[i].page; >+ >+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && >+ (unset == false)) { >+ amdgpu_ttm_set_buffer_funcs_status(adev, false); >+ unset = true; >+ } >+ > rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, > RB_ENABLE, 0); >@@ -681,10 +870,9 @@ > ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, > IB_ENABLE, 0); > WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); >- } > >- sdma0->sched.ready = false; >- sdma1->sched.ready = false; >+ sdma[i]->sched.ready = false; >+ } > } > > /** >@@ -1018,6 +1206,7 @@ > > switch (adev->asic_type) { > case CHIP_RAVEN: >+ case CHIP_RENOIR: > sdma_v4_1_init_power_gating(adev); > sdma_v4_1_update_power_gating(adev, true); > break; >@@ -1473,8 +1662,10 @@ > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > int r; > >- if (adev->asic_type == CHIP_RAVEN) >+ if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) > adev->sdma.num_instances = 1; >+ else if (adev->asic_type == CHIP_ARCTURUS) >+ adev->sdma.num_instances = 8; > else > adev->sdma.num_instances = 2; > >@@ -1499,6 +1690,7 @@ > } > > static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, > struct amdgpu_iv_entry *entry); > > static int sdma_v4_0_late_init(void *handle) >@@ -1518,7 +1710,7 @@ > .sub_block_index = 0, > .name = "sdma", > }; >- int r; >+ int r, i; > > if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { > amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); >@@ -1575,14 +1767,11 @@ > if (r) > goto sysfs; > resume: >- r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); >- if (r) >- goto irq; >- >- r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); >- if (r) { >- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); >- goto irq; >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, >+ AMDGPU_SDMA_IRQ_INSTANCE0 + i); >+ if (r) >+ goto irq; > } > > return 0; >@@ -1606,28 +1795,22 @@ > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > /* SDMA trap event */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, >- &adev->sdma.trap_irq); >- if (r) >- return r; >- >- /* SDMA trap event */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, >- &adev->sdma.trap_irq); >- if (r) >- return r; >- >- /* SDMA SRAM ECC event */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, >- &adev->sdma.ecc_irq); >- if (r) >- return r; >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), >+ SDMA0_4_0__SRCID__SDMA_TRAP, >+ &adev->sdma.trap_irq); >+ if (r) >+ return r; >+ } > > /* SDMA SRAM ECC event */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, >- &adev->sdma.ecc_irq); >- if (r) >- return r; >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), >+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC, >+ &adev->sdma.ecc_irq); >+ if (r) >+ return r; >+ } > > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = &adev->sdma.instance[i].ring; >@@ -1641,11 +1824,8 @@ > ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; > > sprintf(ring->name, "sdma%d", i); >- r = amdgpu_ring_init(adev, ring, 1024, >- &adev->sdma.trap_irq, >- (i == 0) ? >- AMDGPU_SDMA_IRQ_INSTANCE0 : >- AMDGPU_SDMA_IRQ_INSTANCE1); >+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, >+ AMDGPU_SDMA_IRQ_INSTANCE0 + i); > if (r) > return r; > >@@ -1663,9 +1843,7 @@ > sprintf(ring->name, "page%d", i); > r = amdgpu_ring_init(adev, ring, 1024, > &adev->sdma.trap_irq, >- (i == 0) ? >- AMDGPU_SDMA_IRQ_INSTANCE0 : >- AMDGPU_SDMA_IRQ_INSTANCE1); >+ AMDGPU_SDMA_IRQ_INSTANCE0 + i); > if (r) > return r; > } >@@ -1701,10 +1879,7 @@ > amdgpu_ring_fini(&adev->sdma.instance[i].page); > } > >- for (i = 0; i < adev->sdma.num_instances; i++) { >- release_firmware(adev->sdma.instance[i].fw); >- adev->sdma.instance[i].fw = NULL; >- } >+ sdma_v4_0_destroy_inst_ctx(adev); > > return 0; > } >@@ -1718,7 +1893,8 @@ > adev->powerplay.pp_funcs->set_powergating_by_smu) > amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); > >- sdma_v4_0_init_golden_registers(adev); >+ if (!amdgpu_sriov_vf(adev)) >+ sdma_v4_0_init_golden_registers(adev); > > r = sdma_v4_0_start(adev); > >@@ -1728,12 +1904,15 @@ > static int sdma_v4_0_hw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ int i; > > if (amdgpu_sriov_vf(adev)) > return 0; > >- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); >- amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, >+ AMDGPU_SDMA_IRQ_INSTANCE0 + i); >+ } > > sdma_v4_0_ctx_switch_enable(adev, false); > sdma_v4_0_enable(adev, false); >@@ -1776,15 +1955,17 @@ > > static int sdma_v4_0_wait_for_idle(void *handle) > { >- unsigned i; >- u32 sdma0, sdma1; >+ unsigned i, j; >+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > for (i = 0; i < adev->usec_timeout; i++) { >- sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); >- sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); >- >- if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) >+ for (j = 0; j < adev->sdma.num_instances; j++) { >+ sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); >+ if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) >+ break; >+ } >+ if (j == adev->sdma.num_instances) > return 0; > udelay(1); > } >@@ -1820,17 +2001,7 @@ > uint32_t instance; > > DRM_DEBUG("IH: SDMA trap\n"); >- switch (entry->client_id) { >- case SOC15_IH_CLIENTID_SDMA0: >- instance = 0; >- break; >- case SOC15_IH_CLIENTID_SDMA1: >- instance = 1; >- break; >- default: >- return 0; >- } >- >+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id); > switch (entry->ring_id) { > case 0: > amdgpu_fence_process(&adev->sdma.instance[instance].ring); >@@ -1851,20 +2022,15 @@ > } > > static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, > struct amdgpu_iv_entry *entry) > { >- uint32_t instance, err_source; >+ uint32_t err_source; >+ int instance; > >- switch (entry->client_id) { >- case SOC15_IH_CLIENTID_SDMA0: >- instance = 0; >- break; >- case SOC15_IH_CLIENTID_SDMA1: >- instance = 1; >- break; >- default: >+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id); >+ if (instance < 0) > return 0; >- } > > switch (entry->src_id) { > case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: >@@ -1881,7 +2047,7 @@ > > amdgpu_ras_reset_gpu(adev, 0); > >- return AMDGPU_RAS_UE; >+ return AMDGPU_RAS_SUCCESS; > } > > static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, >@@ -1910,16 +2076,9 @@ > > DRM_ERROR("Illegal instruction in SDMA command stream\n"); > >- switch (entry->client_id) { >- case SOC15_IH_CLIENTID_SDMA0: >- instance = 0; >- break; >- case SOC15_IH_CLIENTID_SDMA1: >- instance = 1; >- break; >- default: >+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id); >+ if (instance < 0) > return 0; >- } > > switch (entry->ring_id) { > case 0: >@@ -1936,14 +2095,10 @@ > { > u32 sdma_edc_config; > >- u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? >- sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : >- sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); >- >- sdma_edc_config = RREG32(reg_offset); >+ sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG); > sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, > state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); >- WREG32(reg_offset, sdma_edc_config); >+ WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config); > > return 0; > } >@@ -1953,61 +2108,35 @@ > bool enable) > { > uint32_t data, def; >+ int i; > > if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { >- /* enable sdma0 clock gating */ >- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); >- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); >- if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); >- >- if (adev->sdma.num_instances > 1) { >- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); >- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); >+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); > if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); >+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); > } > } else { >- /* disable sdma0 clock gating */ >- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); >- data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | >- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); >- >- if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); >- >- if (adev->sdma.num_instances > 1) { >- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); >- data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | >- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); >+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | >+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); > if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); >+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); > } > } > } >@@ -2018,34 +2147,23 @@ > bool enable) > { > uint32_t data, def; >+ int i; > > if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { >- /* 1-not override: enable sdma0 mem light sleep */ >- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); >- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; >- if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); >- >- /* 1-not override: enable sdma1 mem light sleep */ >- if (adev->sdma.num_instances > 1) { >- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); >- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ /* 1-not override: enable sdma mem light sleep */ >+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); >+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; > if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); >+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); > } > } else { >- /* 0-override:disable sdma0 mem light sleep */ >- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); >- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; >- if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); >- >- /* 0-override:disable sdma1 mem light sleep */ >- if (adev->sdma.num_instances > 1) { >- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); >- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ /* 0-override:disable sdma mem light sleep */ >+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); >+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; > if (def != data) >- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); >+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); > } > } > } >@@ -2063,6 +2181,8 @@ > case CHIP_VEGA12: > case CHIP_VEGA20: > case CHIP_RAVEN: >+ case CHIP_ARCTURUS: >+ case CHIP_RENOIR: > sdma_v4_0_update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > sdma_v4_0_update_medium_grain_light_sleep(adev, >@@ -2133,7 +2253,43 @@ > .align_mask = 0xf, > .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, >+ .get_rptr = sdma_v4_0_ring_get_rptr, >+ .get_wptr = sdma_v4_0_ring_get_wptr, >+ .set_wptr = sdma_v4_0_ring_set_wptr, >+ .emit_frame_size = >+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */ >+ 3 + /* hdp invalidate */ >+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ >+ /* sdma_v4_0_ring_emit_vm_flush */ >+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + >+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + >+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ >+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ >+ .emit_ib = sdma_v4_0_ring_emit_ib, >+ .emit_fence = sdma_v4_0_ring_emit_fence, >+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, >+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, >+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, >+ .test_ring = sdma_v4_0_ring_test_ring, >+ .test_ib = sdma_v4_0_ring_test_ib, >+ .insert_nop = sdma_v4_0_ring_insert_nop, >+ .pad_ib = sdma_v4_0_ring_pad_ib, >+ .emit_wreg = sdma_v4_0_ring_emit_wreg, >+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, >+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, >+}; >+ >+/* >+ * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). >+ * So create a individual constant ring_funcs for those instances. >+ */ >+static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { >+ .type = AMDGPU_RING_TYPE_SDMA, >+ .align_mask = 0xf, >+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), >+ .support_64bit_ptrs = true, >+ .vmhub = AMDGPU_MMHUB_1, > .get_rptr = sdma_v4_0_ring_get_rptr, > .get_wptr = sdma_v4_0_ring_get_wptr, > .set_wptr = sdma_v4_0_ring_set_wptr, >@@ -2165,7 +2321,39 @@ > .align_mask = 0xf, > .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, >+ .get_rptr = sdma_v4_0_ring_get_rptr, >+ .get_wptr = sdma_v4_0_page_ring_get_wptr, >+ .set_wptr = sdma_v4_0_page_ring_set_wptr, >+ .emit_frame_size = >+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */ >+ 3 + /* hdp invalidate */ >+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ >+ /* sdma_v4_0_ring_emit_vm_flush */ >+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + >+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + >+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ >+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ >+ .emit_ib = sdma_v4_0_ring_emit_ib, >+ .emit_fence = sdma_v4_0_ring_emit_fence, >+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, >+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, >+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, >+ .test_ring = sdma_v4_0_ring_test_ring, >+ .test_ib = sdma_v4_0_ring_test_ib, >+ .insert_nop = sdma_v4_0_ring_insert_nop, >+ .pad_ib = sdma_v4_0_ring_pad_ib, >+ .emit_wreg = sdma_v4_0_ring_emit_wreg, >+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, >+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, >+}; >+ >+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { >+ .type = AMDGPU_RING_TYPE_SDMA, >+ .align_mask = 0xf, >+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), >+ .support_64bit_ptrs = true, >+ .vmhub = AMDGPU_MMHUB_1, > .get_rptr = sdma_v4_0_ring_get_rptr, > .get_wptr = sdma_v4_0_page_ring_get_wptr, > .set_wptr = sdma_v4_0_page_ring_set_wptr, >@@ -2197,10 +2385,20 @@ > int i; > > for (i = 0; i < adev->sdma.num_instances; i++) { >- adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; >+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5) >+ adev->sdma.instance[i].ring.funcs = >+ &sdma_v4_0_ring_funcs_2nd_mmhub; >+ else >+ adev->sdma.instance[i].ring.funcs = >+ &sdma_v4_0_ring_funcs; > adev->sdma.instance[i].ring.me = i; > if (adev->sdma.has_page_queue) { >- adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; >+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5) >+ adev->sdma.instance[i].page.funcs = >+ &sdma_v4_0_page_ring_funcs_2nd_mmhub; >+ else >+ adev->sdma.instance[i].page.funcs = >+ &sdma_v4_0_page_ring_funcs; > adev->sdma.instance[i].page.me = i; > } > } >@@ -2224,10 +2422,23 @@ > > static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) > { >- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; >+ switch (adev->sdma.num_instances) { >+ case 1: >+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; >+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; >+ break; >+ case 8: >+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; >+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; >+ break; >+ case 2: >+ default: >+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; >+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; >+ break; >+ } > adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; > adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; >- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; > adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; > } > >@@ -2293,8 +2504,8 @@ > static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) > { > adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; >- if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) >- adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page; >+ if (adev->sdma.has_page_queue) >+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; > else > adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; > } >@@ -2313,22 +2524,15 @@ > unsigned i; > > adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; >- if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) { >- for (i = 1; i < adev->sdma.num_instances; i++) { >+ for (i = 0; i < adev->sdma.num_instances; i++) { >+ if (adev->sdma.has_page_queue) > sched = &adev->sdma.instance[i].page.sched; >- adev->vm_manager.vm_pte_rqs[i - 1] = >- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; >- } >- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1; >- adev->vm_manager.page_fault = &adev->sdma.instance[0].page; >- } else { >- for (i = 0; i < adev->sdma.num_instances; i++) { >+ else > sched = &adev->sdma.instance[i].ring.sched; >- adev->vm_manager.vm_pte_rqs[i] = >- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; >- } >- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; >+ adev->vm_manager.vm_pte_rqs[i] = >+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; > } >+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; > } > > const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 2019-08-31 15:01:11.850736167 -0500 >@@ -21,8 +21,11 @@ > * > */ > >+#include <linux/delay.h> > #include <linux/firmware.h> >-#include <drm/drmP.h> >+#include <linux/module.h> >+#include <linux/pci.h> >+ > #include "amdgpu.h" > #include "amdgpu_ucode.h" > #include "amdgpu_trace.h" >@@ -42,6 +45,12 @@ > MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); > MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); > >+MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); >+MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); >+ >+MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); >+MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); >+ > #define SDMA1_REG_OFFSET 0x600 > #define SDMA0_HYP_DEC_REG_START 0x5880 > #define SDMA0_HYP_DEC_REG_END 0x5893 >@@ -59,7 +68,7 @@ > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >@@ -71,7 +80,7 @@ > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >@@ -80,6 +89,18 @@ > }; > > static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), >+}; >+ >+static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+}; >+ >+static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), >+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), > }; > > static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) >@@ -111,6 +132,22 @@ > golden_settings_sdma_nv10, > (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); > break; >+ case CHIP_NAVI14: >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_5, >+ (const u32)ARRAY_SIZE(golden_settings_sdma_5)); >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_nv14, >+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); >+ break; >+ case CHIP_NAVI12: >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_5, >+ (const u32)ARRAY_SIZE(golden_settings_sdma_5)); >+ soc15_program_register_sequence(adev, >+ golden_settings_sdma_nv12, >+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); >+ break; > default: > break; > } >@@ -143,6 +180,12 @@ > case CHIP_NAVI10: > chip_name = "navi10"; > break; >+ case CHIP_NAVI14: >+ chip_name = "navi14"; >+ break; >+ case CHIP_NAVI12: >+ chip_name = "navi12"; >+ break; > default: > BUG(); > } >@@ -861,7 +904,7 @@ > if (amdgpu_emu_mode == 1) > msleep(1); > else >- DRM_UDELAY(1); >+ udelay(1); > } > > if (i < adev->usec_timeout) { >@@ -1316,7 +1359,7 @@ > if (ring->trail_seq == > le32_to_cpu(*(ring->trail_fence_cpu_addr))) > break; >- DRM_UDELAY(1); >+ udelay(1); > } > > if (i >= adev->usec_timeout) { >@@ -1472,6 +1515,8 @@ > > switch (adev->asic_type) { > case CHIP_NAVI10: >+ case CHIP_NAVI14: >+ case CHIP_NAVI12: > sdma_v5_0_update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > sdma_v5_0_update_medium_grain_light_sleep(adev, >@@ -1532,7 +1577,7 @@ > .align_mask = 0xf, > .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), > .support_64bit_ptrs = true, >- .vmhub = AMDGPU_GFXHUB, >+ .vmhub = AMDGPU_GFXHUB_0, > .get_rptr = sdma_v5_0_ring_get_rptr, > .get_wptr = sdma_v5_0_ring_get_wptr, > .set_wptr = sdma_v5_0_ring_set_wptr, >@@ -1583,7 +1628,8 @@ > > static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) > { >- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; >+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + >+ adev->sdma.num_instances; > adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; > adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; > } >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/si.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/si.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/si.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/si.c 2019-08-31 15:01:11.850736167 -0500 >@@ -1186,6 +1186,12 @@ > return 0; > } > >+static enum amd_reset_method >+si_asic_reset_method(struct amdgpu_device *adev) >+{ >+ return AMD_RESET_METHOD_LEGACY; >+} >+ > static u32 si_get_config_memsize(struct amdgpu_device *adev) > { > return RREG32(mmCONFIG_MEMSIZE); >@@ -1394,6 +1400,7 @@ > .read_bios_from_rom = &si_read_bios_from_rom, > .read_register = &si_read_register, > .reset = &si_asic_reset, >+ .reset_method = &si_asic_reset_method, > .set_vga_state = &si_vga_set_state, > .get_xclk = &si_get_xclk, > .set_uvd_clocks = &si_set_uvd_clocks, >@@ -1881,7 +1888,7 @@ > if (orig != data) > si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); > >- if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) { >+ if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { > orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); > data &= ~PLL_RAMP_UP_TIME_0_MASK; > if (orig != data) >@@ -1930,14 +1937,14 @@ > > orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); > data &= ~LS2_EXIT_TIME_MASK; >- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) >+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) > data |= LS2_EXIT_TIME(5); > if (orig != data) > si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); > > orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); > data &= ~LS2_EXIT_TIME_MASK; >- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) >+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) > data |= LS2_EXIT_TIME(5); > if (orig != data) > si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c 2019-08-31 15:01:11.850736167 -0500 >@@ -0,0 +1,724 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#include "smuio/smuio_11_0_0_offset.h" >+#include "smuio/smuio_11_0_0_sh_mask.h" >+ >+#include "smu_v11_0_i2c.h" >+#include "amdgpu.h" >+#include "soc15_common.h" >+#include <drm/drm_fixed.h> >+#include <drm/drm_drv.h> >+#include "amdgpu_amdkfd.h" >+#include <linux/i2c.h> >+#include <linux/pci.h> >+#include "amdgpu_ras.h" >+ >+/* error codes */ >+#define I2C_OK 0 >+#define I2C_NAK_7B_ADDR_NOACK 1 >+#define I2C_NAK_TXDATA_NOACK 2 >+#define I2C_TIMEOUT 4 >+#define I2C_SW_TIMEOUT 8 >+#define I2C_ABORT 0x10 >+ >+/* I2C transaction flags */ >+#define I2C_NO_STOP 1 >+#define I2C_RESTART 2 >+ >+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev >+#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor) >+ >+static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); >+ >+ reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); >+ WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); >+} >+ >+ >+static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); >+} >+ >+static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ /* do */ >+ { >+ RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); >+ >+ } /* while (reg_CKSVII2C_ic_clr_intr == 0) */ >+} >+ >+static void smu_v11_0_i2c_configure(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t reg = 0; >+ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); >+ /* Standard mode */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); >+ >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); >+} >+ >+static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ /* >+ * Standard mode speed, These values are taken from SMUIO MAS, >+ * but are different from what is given is >+ * Synopsys spec. The values here are based on assumption >+ * that refclock is 100MHz >+ * >+ * Configuration for standard mode; Speed = 100kbps >+ * Scale linearly, for now only support standard speed clock >+ * This will work only with 100M ref clock >+ * >+ * TBD:Change the calculation to take into account ref clock values also. >+ */ >+ >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); >+} >+ >+static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ /* Convert fromr 8-bit to 7-bit address */ >+ address >>= 1; >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); >+} >+ >+static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t ret = I2C_OK; >+ uint32_t reg, reg_c_tx_abrt_source; >+ >+ /*Check if transmission is completed */ >+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); >+ >+ do { >+ if (time_after(jiffies, timeout_counter)) { >+ ret |= I2C_SW_TIMEOUT; >+ break; >+ } >+ >+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); >+ >+ } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0); >+ >+ if (ret != I2C_OK) >+ return ret; >+ >+ /* This only checks if NAK is received and transaction got aborted */ >+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT); >+ >+ if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { >+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); >+ DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); >+ >+ /* Check for stop due to NACK */ >+ if (REG_GET_FIELD(reg_c_tx_abrt_source, >+ CKSVII2C_IC_TX_ABRT_SOURCE, >+ ABRT_TXDATA_NOACK) == 1) { >+ >+ ret |= I2C_NAK_TXDATA_NOACK; >+ >+ } else if (REG_GET_FIELD(reg_c_tx_abrt_source, >+ CKSVII2C_IC_TX_ABRT_SOURCE, >+ ABRT_7B_ADDR_NOACK) == 1) { >+ >+ ret |= I2C_NAK_7B_ADDR_NOACK; >+ } else { >+ ret |= I2C_ABORT; >+ } >+ >+ smu_v11_0_i2c_clear_status(control); >+ } >+ >+ return ret; >+} >+ >+static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t ret = I2C_OK; >+ uint32_t reg_ic_status, reg_c_tx_abrt_source; >+ >+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); >+ >+ /* If slave is not present */ >+ if (REG_GET_FIELD(reg_c_tx_abrt_source, >+ CKSVII2C_IC_TX_ABRT_SOURCE, >+ ABRT_7B_ADDR_NOACK) == 1) { >+ ret |= I2C_NAK_7B_ADDR_NOACK; >+ >+ smu_v11_0_i2c_clear_status(control); >+ } else { /* wait till some data is there in RXFIFO */ >+ /* Poll for some byte in RXFIFO */ >+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); >+ >+ do { >+ if (time_after(jiffies, timeout_counter)) { >+ ret |= I2C_SW_TIMEOUT; >+ break; >+ } >+ >+ reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); >+ >+ } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0); >+ } >+ >+ return ret; >+} >+ >+ >+ >+ >+/** >+ * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. >+ * >+ * @address: The I2C address of the slave device. >+ * @data: The data to transmit over the bus. >+ * @numbytes: The amount of data to transmit. >+ * @i2c_flag: Flags for transmission >+ * >+ * Returns 0 on success or error. >+ */ >+static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, >+ uint8_t address, uint8_t *data, >+ uint32_t numbytes, uint32_t i2c_flag) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t bytes_sent, reg, ret = 0; >+ unsigned long timeout_counter; >+ >+ bytes_sent = 0; >+ >+ DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", >+ (uint16_t)address, numbytes); >+ >+ if (drm_debug & DRM_UT_DRIVER) { >+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, >+ 16, 1, data, numbytes, false); >+ } >+ >+ /* Set the I2C slave address */ >+ smu_v11_0_i2c_set_address(control, address); >+ /* Enable I2C */ >+ smu_v11_0_i2c_enable(control, true); >+ >+ /* Clear status bits */ >+ smu_v11_0_i2c_clear_status(control); >+ >+ >+ timeout_counter = jiffies + msecs_to_jiffies(20); >+ >+ while (numbytes > 0) { >+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); >+ if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { >+ do { >+ reg = 0; >+ /* >+ * Prepare transaction, no need to set RESTART. I2C engine will send >+ * START as soon as it sees data in TXFIFO >+ */ >+ if (bytes_sent == 0) >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, >+ (i2c_flag & I2C_RESTART) ? 1 : 0); >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); >+ >+ /* determine if we need to send STOP bit or not */ >+ if (numbytes == 1) >+ /* Final transaction, so send stop unless I2C_NO_STOP */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, >+ (i2c_flag & I2C_NO_STOP) ? 0 : 1); >+ /* Write */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); >+ >+ /* Record that the bytes were transmitted */ >+ bytes_sent++; >+ numbytes--; >+ >+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); >+ >+ } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); >+ } >+ >+ /* >+ * We waited too long for the transmission FIFO to become not-full. >+ * Exit the loop with error. >+ */ >+ if (time_after(jiffies, timeout_counter)) { >+ ret |= I2C_SW_TIMEOUT; >+ goto Err; >+ } >+ } >+ >+ ret = smu_v11_0_i2c_poll_tx_status(control); >+ >+Err: >+ /* Any error, no point in proceeding */ >+ if (ret != I2C_OK) { >+ if (ret & I2C_SW_TIMEOUT) >+ DRM_ERROR("TIMEOUT ERROR !!!"); >+ >+ if (ret & I2C_NAK_7B_ADDR_NOACK) >+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); >+ >+ >+ if (ret & I2C_NAK_TXDATA_NOACK) >+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); >+ } >+ >+ return ret; >+} >+ >+ >+/** >+ * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. >+ * >+ * @address: The I2C address of the slave device. >+ * @numbytes: The amount of data to transmit. >+ * @i2c_flag: Flags for transmission >+ * >+ * Returns 0 on success or error. >+ */ >+static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, >+ uint8_t address, uint8_t *data, >+ uint32_t numbytes, uint8_t i2c_flag) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t bytes_received, ret = I2C_OK; >+ >+ bytes_received = 0; >+ >+ /* Set the I2C slave address */ >+ smu_v11_0_i2c_set_address(control, address); >+ >+ /* Enable I2C */ >+ smu_v11_0_i2c_enable(control, true); >+ >+ while (numbytes > 0) { >+ uint32_t reg = 0; >+ >+ smu_v11_0_i2c_clear_status(control); >+ >+ >+ /* Prepare transaction */ >+ >+ /* Each time we disable I2C, so this is not a restart */ >+ if (bytes_received == 0) >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, >+ (i2c_flag & I2C_RESTART) ? 1 : 0); >+ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); >+ /* Read */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); >+ >+ /* Transmitting last byte */ >+ if (numbytes == 1) >+ /* Final transaction, so send stop if requested */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, >+ (i2c_flag & I2C_NO_STOP) ? 0 : 1); >+ >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); >+ >+ ret = smu_v11_0_i2c_poll_rx_status(control); >+ >+ /* Any error, no point in proceeding */ >+ if (ret != I2C_OK) { >+ if (ret & I2C_SW_TIMEOUT) >+ DRM_ERROR("TIMEOUT ERROR !!!"); >+ >+ if (ret & I2C_NAK_7B_ADDR_NOACK) >+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); >+ >+ if (ret & I2C_NAK_TXDATA_NOACK) >+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); >+ >+ break; >+ } >+ >+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD); >+ data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT); >+ >+ /* Record that the bytes were received */ >+ bytes_received++; >+ numbytes--; >+ } >+ >+ DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", >+ (uint16_t)address, bytes_received); >+ >+ if (drm_debug & DRM_UT_DRIVER) { >+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, >+ 16, 1, data, bytes_received, false); >+ } >+ >+ return ret; >+} >+ >+static void smu_v11_0_i2c_abort(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t reg = 0; >+ >+ /* Enable I2C engine; */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); >+ >+ /* Abort previous transaction */ >+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1); >+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); >+ >+ DRM_DEBUG_DRIVER("I2C_Abort() Done."); >+} >+ >+ >+static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ const uint32_t IDLE_TIMEOUT = 1024; >+ uint32_t timeout_count = 0; >+ uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity; >+ >+ reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); >+ reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); >+ >+ >+ if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && >+ (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { >+ /* >+ * Nobody is using I2C engine, but engine remains active because >+ * someone missed to send STOP >+ */ >+ smu_v11_0_i2c_abort(control); >+ } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) { >+ /* Nobody is using I2C engine */ >+ return true; >+ } >+ >+ /* Keep reading activity bit until it's cleared */ >+ do { >+ reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY); >+ >+ if (REG_GET_FIELD(reg_ic_clr_activity, >+ CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0) >+ return true; >+ >+ ++timeout_count; >+ >+ } while (timeout_count < IDLE_TIMEOUT); >+ >+ return false; >+} >+ >+static void smu_v11_0_i2c_init(struct i2c_adapter *control) >+{ >+ /* Disable clock gating */ >+ smu_v11_0_i2c_set_clock_gating(control, false); >+ >+ if (!smu_v11_0_i2c_activity_done(control)) >+ DRM_WARN("I2C busy !"); >+ >+ /* Disable I2C */ >+ smu_v11_0_i2c_enable(control, false); >+ >+ /* Configure I2C to operate as master and in standard mode */ >+ smu_v11_0_i2c_configure(control); >+ >+ /* Initialize the clock to 50 kHz default */ >+ smu_v11_0_i2c_set_clock(control); >+ >+} >+ >+static void smu_v11_0_i2c_fini(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ uint32_t reg_ic_enable_status, reg_ic_enable; >+ >+ smu_v11_0_i2c_enable(control, false); >+ >+ /* Double check if disabled, else force abort */ >+ reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); >+ reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); >+ >+ if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && >+ (REG_GET_FIELD(reg_ic_enable_status, >+ CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { >+ /* >+ * Nobody is using I2C engine, but engine remains active because >+ * someone missed to send STOP >+ */ >+ smu_v11_0_i2c_abort(control); >+ } >+ >+ /* Restore clock gating */ >+ smu_v11_0_i2c_set_clock_gating(control, true); >+ >+} >+ >+static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ /* Send PPSMC_MSG_RequestI2CBus */ >+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) >+ goto Fail; >+ >+ >+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true)) >+ return true; >+ >+Fail: >+ return false; >+} >+ >+static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ >+ /* Send PPSMC_MSG_RequestI2CBus */ >+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) >+ goto Fail; >+ >+ /* Send PPSMC_MSG_ReleaseI2CBus */ >+ if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, >+ false)) >+ return true; >+ >+Fail: >+ return false; >+} >+ >+/***************************** EEPROM I2C GLUE ****************************/ >+ >+static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, >+ uint8_t address, >+ uint8_t *data, >+ uint32_t numbytes) >+{ >+ uint32_t ret = 0; >+ >+ /* First 2 bytes are dummy write to set EEPROM address */ >+ ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); >+ if (ret != I2C_OK) >+ goto Fail; >+ >+ /* Now read data starting with that address */ >+ ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, >+ I2C_RESTART); >+ >+Fail: >+ if (ret != I2C_OK) >+ DRM_ERROR("ReadData() - I2C error occurred :%x", ret); >+ >+ return ret; >+} >+ >+static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, >+ uint8_t address, >+ uint8_t *data, >+ uint32_t numbytes) >+{ >+ uint32_t ret; >+ >+ ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); >+ >+ if (ret != I2C_OK) >+ DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); >+ else >+ /* >+ * According to EEPROM spec there is a MAX of 10 ms required for >+ * EEPROM to flush internal RX buffer after STOP was issued at the >+ * end of write transaction. During this time the EEPROM will not be >+ * responsive to any more commands - so wait a bit more. >+ * >+ * TODO Improve to wait for first ACK for slave address after >+ * internal write cycle done. >+ */ >+ msleep(10); >+ >+ return ret; >+ >+} >+ >+static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) >+{ >+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); >+ >+ if (!smu_v11_0_i2c_bus_lock(i2c)) { >+ DRM_ERROR("Failed to lock the bus from SMU"); >+ return; >+ } >+ >+ control->bus_locked = true; >+} >+ >+static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) >+{ >+ WARN_ONCE(1, "This operation not supposed to run in atomic context!"); >+ return false; >+} >+ >+static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) >+{ >+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); >+ >+ if (!smu_v11_0_i2c_bus_unlock(i2c)) { >+ DRM_ERROR("Failed to unlock the bus from SMU"); >+ return; >+ } >+ >+ control->bus_locked = false; >+} >+ >+static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { >+ .lock_bus = lock_bus, >+ .trylock_bus = trylock_bus, >+ .unlock_bus = unlock_bus, >+}; >+ >+static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, >+ struct i2c_msg *msgs, int num) >+{ >+ int i, ret; >+ struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap); >+ >+ if (!control->bus_locked) { >+ DRM_ERROR("I2C bus unlocked, stopping transaction!"); >+ return -EIO; >+ } >+ >+ smu_v11_0_i2c_init(i2c_adap); >+ >+ for (i = 0; i < num; i++) { >+ if (msgs[i].flags & I2C_M_RD) >+ ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, >+ (uint8_t)msgs[i].addr, >+ msgs[i].buf, msgs[i].len); >+ else >+ ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, >+ (uint8_t)msgs[i].addr, >+ msgs[i].buf, msgs[i].len); >+ >+ if (ret != I2C_OK) { >+ num = -EIO; >+ break; >+ } >+ } >+ >+ smu_v11_0_i2c_fini(i2c_adap); >+ return num; >+} >+ >+static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) >+{ >+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; >+} >+ >+ >+static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { >+ .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, >+ .functionality = smu_v11_0_i2c_eeprom_i2c_func, >+}; >+ >+int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) >+{ >+ struct amdgpu_device *adev = to_amdgpu_device(control); >+ int res; >+ >+ control->owner = THIS_MODULE; >+ control->class = I2C_CLASS_SPD; >+ control->dev.parent = &adev->pdev->dev; >+ control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; >+ snprintf(control->name, sizeof(control->name), "RAS EEPROM"); >+ control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; >+ >+ res = i2c_add_adapter(control); >+ if (res) >+ DRM_ERROR("Failed to register hw i2c, err: %d\n", res); >+ >+ return res; >+} >+ >+void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) >+{ >+ i2c_del_adapter(control); >+} >+ >+/* >+ * Keep this for future unit test if bugs arise >+ */ >+#if 0 >+#define I2C_TARGET_ADDR 0xA0 >+ >+bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) >+{ >+ >+ uint32_t ret = I2C_OK; >+ uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; >+ >+ >+ DRM_INFO("Begin"); >+ >+ if (!smu_v11_0_i2c_bus_lock(control)) { >+ DRM_ERROR("Failed to lock the bus!."); >+ return false; >+ } >+ >+ smu_v11_0_i2c_init(control); >+ >+ /* Write 0xde to address 0x0000 on the EEPROM */ >+ ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); >+ >+ ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); >+ >+ smu_v11_0_i2c_fini(control); >+ >+ smu_v11_0_i2c_bus_unlock(control); >+ >+ >+ DRM_INFO("End"); >+ return true; >+} >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h 2019-08-31 15:01:11.851736168 -0500 >@@ -0,0 +1,34 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#ifndef SMU_V11_I2C_CONTROL_H >+#define SMU_V11_I2C_CONTROL_H >+ >+#include <linux/types.h> >+ >+struct i2c_adapter; >+ >+int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); >+void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.c 2019-08-31 15:08:53.346776943 -0500 >@@ -63,6 +63,8 @@ > #include "uvd_v7_0.h" > #include "vce_v4_0.h" > #include "vcn_v1_0.h" >+#include "vcn_v2_0.h" >+#include "vcn_v2_5.h" > #include "dce_virtual.h" > #include "mxgpu_ai.h" > #include "amdgpu_smu.h" >@@ -115,6 +117,49 @@ > spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); > } > >+static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) >+{ >+ unsigned long flags, address, data; >+ u64 r; >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ /* read low 32 bit */ >+ WREG32(address, reg); >+ (void)RREG32(address); >+ r = RREG32(data); >+ >+ /* read high 32 bit*/ >+ WREG32(address, reg + 4); >+ (void)RREG32(address); >+ r |= ((u64)RREG32(data) << 32); >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+ return r; >+} >+ >+static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) >+{ >+ unsigned long flags, address, data; >+ >+ address = adev->nbio_funcs->get_pcie_index_offset(adev); >+ data = adev->nbio_funcs->get_pcie_data_offset(adev); >+ >+ spin_lock_irqsave(&adev->pcie_idx_lock, flags); >+ /* write low 32 bit */ >+ WREG32(address, reg); >+ (void)RREG32(address); >+ WREG32(data, (u32)(v & 0xffffffffULL)); >+ (void)RREG32(data); >+ >+ /* write high 32 bit */ >+ WREG32(address, reg + 4); >+ (void)RREG32(address); >+ WREG32(data, (u32)(v >> 32)); >+ (void)RREG32(data); >+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); >+} >+ > static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) > { > unsigned long flags, address, data; >@@ -464,12 +509,23 @@ > return 0; > } > >-static int soc15_asic_reset(struct amdgpu_device *adev) >+static int soc15_mode2_reset(struct amdgpu_device *adev) >+{ >+ if (!adev->powerplay.pp_funcs || >+ !adev->powerplay.pp_funcs->asic_reset_mode_2) >+ return -ENOENT; >+ >+ return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); >+} >+ >+static enum amd_reset_method >+soc15_asic_reset_method(struct amdgpu_device *adev) > { >- int ret; > bool baco_reset; > > switch (adev->asic_type) { >+ case CHIP_RAVEN: >+ return AMD_RESET_METHOD_MODE2; > case CHIP_VEGA10: > case CHIP_VEGA12: > soc15_asic_get_baco_capability(adev, &baco_reset); >@@ -493,11 +549,23 @@ > } > > if (baco_reset) >- ret = soc15_asic_baco_reset(adev); >+ return AMD_RESET_METHOD_BACO; > else >- ret = soc15_asic_mode1_reset(adev); >+ return AMD_RESET_METHOD_MODE1; >+} > >- return ret; >+static int soc15_asic_reset(struct amdgpu_device *adev) >+{ >+ switch (soc15_asic_reset_method(adev)) { >+ case AMD_RESET_METHOD_BACO: >+ amdgpu_inc_vram_lost(adev); >+ return soc15_asic_baco_reset(adev); >+ case AMD_RESET_METHOD_MODE2: >+ return soc15_mode2_reset(adev); >+ default: >+ amdgpu_inc_vram_lost(adev); >+ return soc15_asic_mode1_reset(adev); >+ } > } > > /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, >@@ -581,26 +649,31 @@ > case CHIP_VEGA10: > case CHIP_VEGA12: > case CHIP_RAVEN: >+ case CHIP_RENOIR: > vega10_reg_base_init(adev); > break; > case CHIP_VEGA20: > vega20_reg_base_init(adev); > break; >+ case CHIP_ARCTURUS: >+ arct_reg_base_init(adev); >+ break; > default: > return -EINVAL; > } > >- if (adev->asic_type == CHIP_VEGA20) >+ if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) > adev->gmc.xgmi.supported = true; > > if (adev->flags & AMD_IS_APU) > adev->nbio_funcs = &nbio_v7_0_funcs; >- else if (adev->asic_type == CHIP_VEGA20) >+ else if (adev->asic_type == CHIP_VEGA20 || >+ adev->asic_type == CHIP_ARCTURUS) > adev->nbio_funcs = &nbio_v7_4_funcs; > else > adev->nbio_funcs = &nbio_v6_1_funcs; > >- if (adev->asic_type == CHIP_VEGA20) >+ if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) > adev->df_funcs = &df_v3_6_funcs; > else > adev->df_funcs = &df_v1_7_funcs; >@@ -672,6 +745,37 @@ > #endif > amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); > break; >+ case CHIP_ARCTURUS: >+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); >+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); >+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) >+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); >+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); >+ break; >+ case CHIP_RENOIR: >+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); >+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); >+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) >+ amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); >+ if (is_support_sw_smu(adev)) >+ amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); >+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); >+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) >+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); >+#if defined(CONFIG_DRM_AMD_DC) >+ else if (amdgpu_device_has_dc_support(adev)) >+ amdgpu_device_ip_block_add(adev, &dm_ip_block); >+#else >+# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." >+#endif >+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); >+ break; > default: > return -EINVAL; > } >@@ -688,7 +792,7 @@ > struct amdgpu_ring *ring) > { > if (!ring || !ring->funcs->emit_wreg) >- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); >+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); > else > amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( > HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); >@@ -714,14 +818,9 @@ > > /* Set the 2 events that we wish to watch, defined above */ > /* Reg 40 is # received msgs */ >+ /* Reg 104 is # of posted requests sent */ > perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); >- /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ >- if (adev->asic_type == CHIP_VEGA20) >- perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, >- EVENT1_SEL, 108); >- else >- perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, >- EVENT1_SEL, 104); >+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > > /* Write to enable desired perf counters */ > WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); >@@ -751,6 +850,55 @@ > *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > } > >+static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, >+ uint64_t *count1) >+{ >+ uint32_t perfctr = 0; >+ uint64_t cnt0_of, cnt1_of; >+ int tmp; >+ >+ /* This reports 0 on APUs, so return to avoid writing/reading registers >+ * that may or may not be different from their GPU counterparts >+ */ >+ if (adev->flags & AMD_IS_APU) >+ return; >+ >+ /* Set the 2 events that we wish to watch, defined above */ >+ /* Reg 40 is # received msgs */ >+ /* Reg 108 is # of posted requests sent on VG20 */ >+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, >+ EVENT0_SEL, 40); >+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, >+ EVENT1_SEL, 108); >+ >+ /* Write to enable desired perf counters */ >+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); >+ /* Zero out and enable the perf counters >+ * Write 0x5: >+ * Bit 0 = Start all counters(1) >+ * Bit 2 = Global counter reset enable(1) >+ */ >+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); >+ >+ msleep(1000); >+ >+ /* Load the shadow and disable the perf counters >+ * Write 0x2: >+ * Bit 0 = Stop counters(0) >+ * Bit 1 = Load the shadow counters(1) >+ */ >+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); >+ >+ /* Read register values to get any >32bit overflow */ >+ tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); >+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); >+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); >+ >+ /* Get the values and add the overflow */ >+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); >+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); >+} >+ > static bool soc15_need_reset_on_init(struct amdgpu_device *adev) > { > u32 sol_reg; >@@ -792,6 +940,7 @@ > .read_bios_from_rom = &soc15_read_bios_from_rom, > .read_register = &soc15_read_register, > .reset = &soc15_asic_reset, >+ .reset_method = &soc15_asic_reset_method, > .set_vga_state = &soc15_vga_set_state, > .get_xclk = &soc15_get_xclk, > .set_uvd_clocks = &soc15_set_uvd_clocks, >@@ -821,9 +970,10 @@ > .invalidate_hdp = &soc15_invalidate_hdp, > .need_full_reset = &soc15_need_full_reset, > .init_doorbell_index = &vega20_doorbell_index_init, >- .get_pcie_usage = &soc15_get_pcie_usage, >+ .get_pcie_usage = &vega20_get_pcie_usage, > .need_reset_on_init = &soc15_need_reset_on_init, > .get_pcie_replay_count = &soc15_get_pcie_replay_count, >+ .reset_method = &soc15_asic_reset_method > }; > > static int soc15_common_early_init(void *handle) >@@ -837,6 +987,8 @@ > adev->smc_wreg = NULL; > adev->pcie_rreg = &soc15_pcie_rreg; > adev->pcie_wreg = &soc15_pcie_wreg; >+ adev->pcie_rreg64 = &soc15_pcie_rreg64; >+ adev->pcie_wreg64 = &soc15_pcie_wreg64; > adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; > adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; > adev->didt_rreg = &soc15_didt_rreg; >@@ -993,6 +1145,53 @@ > adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; > } > break; >+ case CHIP_ARCTURUS: >+ adev->asic_funcs = &vega20_asic_funcs; >+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | >+ AMD_CG_SUPPORT_GFX_MGLS | >+ AMD_CG_SUPPORT_GFX_CGCG | >+ AMD_CG_SUPPORT_GFX_CGLS | >+ AMD_CG_SUPPORT_GFX_CP_LS | >+ AMD_CG_SUPPORT_HDP_MGCG | >+ AMD_CG_SUPPORT_HDP_LS | >+ AMD_CG_SUPPORT_SDMA_MGCG | >+ AMD_CG_SUPPORT_SDMA_LS | >+ AMD_CG_SUPPORT_MC_MGCG | >+ AMD_CG_SUPPORT_MC_LS; >+ adev->pg_flags = 0; >+ adev->external_rev_id = adev->rev_id + 0x32; >+ break; >+ case CHIP_RENOIR: >+ adev->asic_funcs = &soc15_asic_funcs; >+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | >+ AMD_CG_SUPPORT_GFX_MGLS | >+ AMD_CG_SUPPORT_GFX_3D_CGCG | >+ AMD_CG_SUPPORT_GFX_3D_CGLS | >+ AMD_CG_SUPPORT_GFX_CGCG | >+ AMD_CG_SUPPORT_GFX_CGLS | >+ AMD_CG_SUPPORT_GFX_CP_LS | >+ AMD_CG_SUPPORT_MC_MGCG | >+ AMD_CG_SUPPORT_MC_LS | >+ AMD_CG_SUPPORT_SDMA_MGCG | >+ AMD_CG_SUPPORT_SDMA_LS | >+ AMD_CG_SUPPORT_BIF_LS | >+ AMD_CG_SUPPORT_HDP_LS | >+ AMD_CG_SUPPORT_ROM_MGCG | >+ AMD_CG_SUPPORT_VCN_MGCG | >+ AMD_CG_SUPPORT_IH_CG | >+ AMD_CG_SUPPORT_ATHUB_LS | >+ AMD_CG_SUPPORT_ATHUB_MGCG | >+ AMD_CG_SUPPORT_DF_MGCG; >+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | >+ AMD_PG_SUPPORT_VCN | >+ AMD_PG_SUPPORT_VCN_DPG; >+ adev->external_rev_id = adev->rev_id + 0x91; >+ >+ if (adev->pm.pp_feature & PP_GFXOFF_MASK) >+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | >+ AMD_PG_SUPPORT_CP | >+ AMD_PG_SUPPORT_RLC_SMU_HS; >+ break; > default: > /* FIXME: not supported yet */ > return -EINVAL; >@@ -1038,21 +1237,18 @@ > int i; > struct amdgpu_ring *ring; > >- /* Two reasons to skip >- * 1, Host driver already programmed them >- * 2, To avoid registers program violations in SR-IOV >- */ >- if (!amdgpu_virt_support_skip_setting(adev)) { >+ /* sdma/ih doorbell range are programed by hypervisor */ >+ if (!amdgpu_sriov_vf(adev)) { > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = &adev->sdma.instance[i].ring; > adev->nbio_funcs->sdma_doorbell_range(adev, i, > ring->use_doorbell, ring->doorbell_index, > adev->doorbell_index.sdma_doorbell_range); > } >- } > >- adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, >+ adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, > adev->irq.ih.doorbell_index); >+ } > } > > static int soc15_common_hw_init(void *handle) >@@ -1129,7 +1325,8 @@ > { > uint32_t def, data; > >- if (adev->asic_type == CHIP_VEGA20) { >+ if (adev->asic_type == CHIP_VEGA20 || >+ adev->asic_type == CHIP_ARCTURUS) { > def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); > > if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) >@@ -1248,6 +1445,7 @@ > state == AMD_CG_STATE_GATE ? true : false); > break; > case CHIP_RAVEN: >+ case CHIP_RENOIR: > adev->nbio_funcs->update_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > adev->nbio_funcs->update_medium_grain_light_sleep(adev, >@@ -1261,6 +1459,10 @@ > soc15_update_rom_medium_grain_clock_gating(adev, > state == AMD_CG_STATE_GATE ? true : false); > break; >+ case CHIP_ARCTURUS: >+ soc15_update_hdp_light_sleep(adev, >+ state == AMD_CG_STATE_GATE ? true : false); >+ break; > default: > break; > } >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15_common.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15_common.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15_common.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15_common.h 2019-08-31 15:01:11.851736168 -0500 >@@ -69,9 +69,10 @@ > } \ > } while (0) > >+#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a))) > #define WREG32_RLC(reg, value) \ > do { \ >- if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ >+ if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ > uint32_t i = 0; \ > uint32_t retries = 50000; \ > uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ >@@ -96,7 +97,7 @@ > #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ > do { \ > uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ >- if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ >+ if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ > uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ > uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ > uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.h 2019-08-31 15:01:11.851736168 -0500 >@@ -77,6 +77,7 @@ > > int vega10_reg_base_init(struct amdgpu_device *adev); > int vega20_reg_base_init(struct amdgpu_device *adev); >+int arct_reg_base_init(struct amdgpu_device *adev); > > void vega10_doorbell_index_init(struct amdgpu_device *adev); > void vega20_doorbell_index_init(struct amdgpu_device *adev); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c 2019-08-31 15:01:11.851736168 -0500 >@@ -0,0 +1,255 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#include "umc_v6_1.h" >+#include "amdgpu_ras.h" >+#include "amdgpu.h" >+ >+#include "rsmu/rsmu_0_0_2_offset.h" >+#include "rsmu/rsmu_0_0_2_sh_mask.h" >+#include "umc/umc_6_1_1_offset.h" >+#include "umc/umc_6_1_1_sh_mask.h" >+ >+#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 >+ >+/* >+ * (addr / 256) * 8192, the higher 26 bits in ErrorAddr >+ * is the index of 8KB block >+ */ >+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) >+/* channel index is the index of 256B block */ >+#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) >+/* offset in 256B block */ >+#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) >+ >+const uint32_t >+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { >+ {2, 18, 11, 27}, {4, 20, 13, 29}, >+ {1, 17, 8, 24}, {7, 23, 14, 30}, >+ {10, 26, 3, 19}, {12, 28, 5, 21}, >+ {9, 25, 0, 16}, {15, 31, 6, 22} >+}; >+ >+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, >+ uint32_t umc_instance) >+{ >+ uint32_t rsmu_umc_index; >+ >+ rsmu_umc_index = RREG32_SOC15(RSMU, 0, >+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); >+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, >+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, >+ RSMU_UMC_INDEX_MODE_EN, 1); >+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, >+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, >+ RSMU_UMC_INDEX_INSTANCE, umc_instance); >+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, >+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, >+ RSMU_UMC_INDEX_WREN, 1 << umc_instance); >+ WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, >+ rsmu_umc_index); >+} >+ >+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) >+{ >+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, >+ RSMU_UMC_INDEX_MODE_EN, 0); >+} >+ >+static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, >+ uint32_t umc_reg_offset, >+ unsigned long *error_count) >+{ >+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; >+ uint32_t ecc_err_cnt, ecc_err_cnt_addr; >+ uint64_t mc_umc_status; >+ uint32_t mc_umc_status_addr; >+ >+ ecc_err_cnt_sel_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); >+ ecc_err_cnt_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); >+ mc_umc_status_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); >+ >+ /* select the lower chip and check the error count */ >+ ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); >+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, >+ EccErrCntCsSel, 0); >+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); >+ ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); >+ *error_count += >+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - >+ UMC_V6_1_CE_CNT_INIT); >+ /* clear the lower chip err count */ >+ WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); >+ >+ /* select the higher chip and check the err counter */ >+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, >+ EccErrCntCsSel, 1); >+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); >+ ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); >+ *error_count += >+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - >+ UMC_V6_1_CE_CNT_INIT); >+ /* clear the higher chip err count */ >+ WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); >+ >+ /* check for SRAM correctable error >+ MCUMC_STATUS is a 64 bit register */ >+ mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); >+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) >+ *error_count += 1; >+} >+ >+static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, >+ uint32_t umc_reg_offset, >+ unsigned long *error_count) >+{ >+ uint64_t mc_umc_status; >+ uint32_t mc_umc_status_addr; >+ >+ mc_umc_status_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); >+ >+ /* check the MCUMC_STATUS */ >+ mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); >+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && >+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) >+ *error_count += 1; >+} >+ >+static void umc_v6_1_query_error_count(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, uint32_t umc_reg_offset, >+ uint32_t channel_index) >+{ >+ umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, >+ &(err_data->ce_count)); >+ umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, >+ &(err_data->ue_count)); >+} >+ >+static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, >+ void *ras_error_status) >+{ >+ amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); >+} >+ >+static void umc_v6_1_query_error_address(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, >+ uint32_t umc_reg_offset, uint32_t channel_index) >+{ >+ uint32_t lsb, mc_umc_status_addr; >+ uint64_t mc_umc_status, err_addr; >+ >+ mc_umc_status_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); >+ >+ /* skip error address process if -ENOMEM */ >+ if (!err_data->err_addr) { >+ /* clear umc status */ >+ WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); >+ return; >+ } >+ >+ mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); >+ >+ /* calculate error address if ue/ce error is detected */ >+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && >+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || >+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { >+ err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); >+ >+ /* the lowest lsb bits should be ignored */ >+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); >+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); >+ err_addr &= ~((0x1ULL << lsb) - 1); >+ >+ /* translate umc channel address to soc pa, 3 parts are included */ >+ err_data->err_addr[err_data->err_addr_cnt] = >+ ADDR_OF_8KB_BLOCK(err_addr) | >+ ADDR_OF_256B_BLOCK(channel_index) | >+ OFFSET_IN_256B_BLOCK(err_addr); >+ >+ err_data->err_addr_cnt++; >+ } >+ >+ /* clear umc status */ >+ WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); >+} >+ >+static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, >+ void *ras_error_status) >+{ >+ amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); >+} >+ >+static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, >+ struct ras_err_data *err_data, >+ uint32_t umc_reg_offset, uint32_t channel_index) >+{ >+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; >+ uint32_t ecc_err_cnt_addr; >+ >+ ecc_err_cnt_sel_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); >+ ecc_err_cnt_addr = >+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); >+ >+ /* select the lower chip and check the error count */ >+ ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); >+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, >+ EccErrCntCsSel, 0); >+ /* set ce error interrupt type to APIC based interrupt */ >+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, >+ EccErrInt, 0x1); >+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); >+ /* set error count to initial value */ >+ WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); >+ >+ /* select the higher chip and check the err counter */ >+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, >+ EccErrCntCsSel, 1); >+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); >+ WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); >+} >+ >+static void umc_v6_1_ras_init(struct amdgpu_device *adev) >+{ >+ void *ras_error_status = NULL; >+ >+ amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); >+} >+ >+const struct amdgpu_umc_funcs umc_v6_1_funcs = { >+ .ras_init = umc_v6_1_ras_init, >+ .query_ras_error_count = umc_v6_1_query_ras_error_count, >+ .query_ras_error_address = umc_v6_1_query_ras_error_address, >+ .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, >+ .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, >+}; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h 2019-08-31 15:01:11.851736168 -0500 >@@ -0,0 +1,51 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+#ifndef __UMC_V6_1_H__ >+#define __UMC_V6_1_H__ >+ >+#include "soc15_common.h" >+#include "amdgpu.h" >+ >+/* HBM Memory Channel Width */ >+#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 >+/* number of umc channel instance with memory map register access */ >+#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4 >+/* number of umc instance with memory map register access */ >+#define UMC_V6_1_UMC_INSTANCE_NUM 8 >+/* total channel instances in one umc block */ >+#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) >+/* UMC regiser per channel offset */ >+#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 >+ >+/* EccErrCnt max value */ >+#define UMC_V6_1_CE_CNT_MAX 0xffff >+/* umc ce interrupt threshold */ >+#define UMC_V6_1_CE_INT_THRESHOLD 0xffff >+/* umc ce count initial value */ >+#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) >+ >+extern const struct amdgpu_umc_funcs umc_v6_1_funcs; >+extern const uint32_t >+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; >+ >+#endif >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 2019-08-31 15:01:11.851736168 -0500 >@@ -1763,7 +1763,7 @@ > .align_mask = 0xf, > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = uvd_v7_0_ring_get_rptr, > .get_wptr = uvd_v7_0_ring_get_wptr, > .set_wptr = uvd_v7_0_ring_set_wptr, >@@ -1796,7 +1796,7 @@ > .nop = HEVC_ENC_CMD_NO_OP, > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = uvd_v7_0_enc_ring_get_rptr, > .get_wptr = uvd_v7_0_enc_ring_get_wptr, > .set_wptr = uvd_v7_0_enc_ring_set_wptr, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 2019-08-31 15:01:11.851736168 -0500 >@@ -1070,7 +1070,7 @@ > .nop = VCE_CMD_NO_OP, > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vce_v4_0_ring_get_rptr, > .get_wptr = vce_v4_0_ring_get_wptr, > .set_wptr = vce_v4_0_ring_set_wptr, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 2019-08-31 15:01:11.851736168 -0500 >@@ -63,6 +63,7 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >+ adev->vcn.num_vcn_inst = 1; > adev->vcn.num_enc_rings = 2; > > vcn_v1_0_set_dec_ring_funcs(adev); >@@ -87,20 +88,21 @@ > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > /* VCN DEC TRAP */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); >+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, >+ VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq); > if (r) > return r; > > /* VCN ENC TRAP */ > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, >- &adev->vcn.irq); >+ &adev->vcn.inst->irq); > if (r) > return r; > } > > /* VCN JPEG TRAP */ >- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); >+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq); > if (r) > return r; > >@@ -122,39 +124,39 @@ > if (r) > return r; > >- ring = &adev->vcn.ring_dec; >+ ring = &adev->vcn.inst->ring_dec; > sprintf(ring->name, "vcn_dec"); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > >- adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = >+ adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = > SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); >- adev->vcn.internal.data0 = adev->vcn.external.data0 = >+ adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = > SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); >- adev->vcn.internal.data1 = adev->vcn.external.data1 = >+ adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = > SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); >- adev->vcn.internal.cmd = adev->vcn.external.cmd = >+ adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = > SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); >- adev->vcn.internal.nop = adev->vcn.external.nop = >+ adev->vcn.internal.nop = adev->vcn.inst->external.nop = > SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- ring = &adev->vcn.ring_enc[i]; >+ ring = &adev->vcn.inst->ring_enc[i]; > sprintf(ring->name, "vcn_enc%d", i); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > } > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > sprintf(ring->name, "vcn_jpeg"); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > > adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; >- adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = >+ adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch = > SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); > > return 0; >@@ -191,7 +193,7 @@ > static int vcn_v1_0_hw_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > int i, r; > > r = amdgpu_ring_test_helper(ring); >@@ -199,14 +201,14 @@ > goto done; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- ring = &adev->vcn.ring_enc[i]; >+ ring = &adev->vcn.inst->ring_enc[i]; > ring->sched.ready = true; > r = amdgpu_ring_test_helper(ring); > if (r) > goto done; > } > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > r = amdgpu_ring_test_helper(ring); > if (r) > goto done; >@@ -229,7 +231,7 @@ > static int vcn_v1_0_hw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > RREG32_SOC15(VCN, 0, mmUVD_STATUS)) >@@ -304,9 +306,9 @@ > offset = 0; > } else { > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr)); >+ lower_32_bits(adev->vcn.inst->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr)); >+ upper_32_bits(adev->vcn.inst->gpu_addr)); > offset = size; > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, > AMDGPU_UVD_FIRMWARE_OFFSET >> 3); >@@ -316,17 +318,17 @@ > > /* cache window 1: stack */ > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset)); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset)); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset)); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); > > /* cache window 2: context */ > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); > >@@ -374,9 +376,9 @@ > offset = 0; > } else { > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); >+ lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); >+ upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); > offset = size; > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, > AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); >@@ -386,9 +388,9 @@ > > /* cache window 1: stack */ > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, > 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, >@@ -396,10 +398,10 @@ > > /* cache window 2: context */ > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), > 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), > 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); > WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, >@@ -779,7 +781,7 @@ > */ > static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) > { >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > uint32_t rb_bufsz, tmp; > uint32_t lmi_swap_cntl; > int i, j, r; >@@ -932,21 +934,21 @@ > WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, > ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); > >- ring = &adev->vcn.ring_enc[0]; >+ ring = &adev->vcn.inst->ring_enc[0]; > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); > >- ring = &adev->vcn.ring_enc[1]; >+ ring = &adev->vcn.inst->ring_enc[1]; > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); > WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | > UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); >@@ -968,7 +970,7 @@ > > static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) > { >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > uint32_t rb_bufsz, tmp; > uint32_t lmi_swap_cntl; > >@@ -1106,7 +1108,7 @@ > ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); > > /* initialize JPEG wptr */ >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); > > /* copy patch commands to the jpeg ring */ >@@ -1255,21 +1257,21 @@ > UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); > > /* Restore */ >- ring = &adev->vcn.ring_enc[0]; >+ ring = &adev->vcn.inst->ring_enc[0]; > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); > >- ring = &adev->vcn.ring_enc[1]; >+ ring = &adev->vcn.inst->ring_enc[1]; > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); > >- ring = &adev->vcn.ring_dec; >+ ring = &adev->vcn.inst->ring_dec; > WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, > RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); > SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, >@@ -1315,7 +1317,7 @@ > UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); > > /* Restore */ >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); > WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, > UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | >@@ -1329,7 +1331,7 @@ > WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, > UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); > >- ring = &adev->vcn.ring_dec; >+ ring = &adev->vcn.inst->ring_dec; > WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, > RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); > SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, >@@ -1596,7 +1598,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) >+ if (ring == &adev->vcn.inst->ring_enc[0]) > return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); > else > return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); >@@ -1613,7 +1615,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) >+ if (ring == &adev->vcn.inst->ring_enc[0]) > return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); > else > return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); >@@ -1630,7 +1632,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) >+ if (ring == &adev->vcn.inst->ring_enc[0]) > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, > lower_32_bits(ring->wptr)); > else >@@ -2114,16 +2116,16 @@ > > switch (entry->src_id) { > case 124: >- amdgpu_fence_process(&adev->vcn.ring_dec); >+ amdgpu_fence_process(&adev->vcn.inst->ring_dec); > break; > case 119: >- amdgpu_fence_process(&adev->vcn.ring_enc[0]); >+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); > break; > case 120: >- amdgpu_fence_process(&adev->vcn.ring_enc[1]); >+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); > break; > case 126: >- amdgpu_fence_process(&adev->vcn.ring_jpeg); >+ amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); > break; > default: > DRM_ERROR("Unhandled interrupt: %d %d\n", >@@ -2198,7 +2200,7 @@ > .align_mask = 0xf, > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vcn_v1_0_dec_ring_get_rptr, > .get_wptr = vcn_v1_0_dec_ring_get_wptr, > .set_wptr = vcn_v1_0_dec_ring_set_wptr, >@@ -2232,7 +2234,7 @@ > .nop = VCN_ENC_CMD_NO_OP, > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vcn_v1_0_enc_ring_get_rptr, > .get_wptr = vcn_v1_0_enc_ring_get_wptr, > .set_wptr = vcn_v1_0_enc_ring_set_wptr, >@@ -2264,7 +2266,7 @@ > .nop = PACKET0(0x81ff, 0), > .support_64bit_ptrs = false, > .no_user_fence = true, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .extra_dw = 64, > .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, > .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, >@@ -2295,7 +2297,7 @@ > > static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) > { >- adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; >+ adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; > DRM_INFO("VCN decode is enabled in VM mode\n"); > } > >@@ -2304,14 +2306,14 @@ > int i; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) >- adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; >+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; > > DRM_INFO("VCN encode is enabled in VM mode\n"); > } > > static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) > { >- adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; >+ adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; > DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); > } > >@@ -2322,8 +2324,8 @@ > > static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) > { >- adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; >- adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; >+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; >+ adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; > } > > const struct amdgpu_ip_block_version vcn_v1_0_ip_block = >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 2019-08-31 15:08:26.826774600 -0500 >@@ -22,7 +22,7 @@ > */ > > #include <linux/firmware.h> >-#include <drm/drmP.h> >+ > #include "amdgpu.h" > #include "amdgpu_vcn.h" > #include "soc15.h" >@@ -92,6 +92,7 @@ > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >+ adev->vcn.num_vcn_inst = 1; > adev->vcn.num_enc_rings = 2; > > vcn_v2_0_set_dec_ring_funcs(adev); >@@ -118,7 +119,7 @@ > /* VCN DEC TRAP */ > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, > VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, >- &adev->vcn.irq); >+ &adev->vcn.inst->irq); > if (r) > return r; > >@@ -126,15 +127,14 @@ > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, > i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, >- &adev->vcn.irq); >+ &adev->vcn.inst->irq); > if (r) > return r; > } > > /* VCN JPEG TRAP */ > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, >- VCN_2_0__SRCID__JPEG_DECODE, >- &adev->vcn.irq); >+ VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq); > if (r) > return r; > >@@ -156,49 +156,56 @@ > if (r) > return r; > >- ring = &adev->vcn.ring_dec; >+ ring = &adev->vcn.inst->ring_dec; > > ring->use_doorbell = true; > ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; > > sprintf(ring->name, "vcn_dec"); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > >+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; >+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; >+ > adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; >- adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); >+ adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); > adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; >- adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); >+ adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); > adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; >- adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); >+ adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); > adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; >- adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); >+ adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); > adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; >- adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); >+ adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- ring = &adev->vcn.ring_enc[i]; >+ ring = &adev->vcn.inst->ring_enc[i]; > ring->use_doorbell = true; > ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; > sprintf(ring->name, "vcn_enc%d", i); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > } > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > ring->use_doorbell = true; > ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; > sprintf(ring->name, "vcn_jpeg"); >- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); > if (r) > return r; > > adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; > > adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; >- adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); >+ adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); > > return 0; > } >@@ -234,11 +241,11 @@ > static int vcn_v2_0_hw_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > int i, r; > > adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, >- ring->doorbell_index); >+ ring->doorbell_index, 0); > > ring->sched.ready = true; > r = amdgpu_ring_test_ring(ring); >@@ -248,7 +255,7 @@ > } > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- ring = &adev->vcn.ring_enc[i]; >+ ring = &adev->vcn.inst->ring_enc[i]; > ring->sched.ready = true; > r = amdgpu_ring_test_ring(ring); > if (r) { >@@ -257,7 +264,7 @@ > } > } > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > ring->sched.ready = true; > r = amdgpu_ring_test_ring(ring); > if (r) { >@@ -283,7 +290,7 @@ > static int vcn_v2_0_hw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > int i; > > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || >@@ -294,11 +301,11 @@ > ring->sched.ready = false; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >- ring = &adev->vcn.ring_enc[i]; >+ ring = &adev->vcn.inst->ring_enc[i]; > ring->sched.ready = false; > } > >- ring = &adev->vcn.ring_jpeg; >+ ring = &adev->vcn.inst->ring_jpeg; > ring->sched.ready = false; > > return 0; >@@ -368,9 +375,9 @@ > offset = 0; > } else { > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr)); >+ lower_32_bits(adev->vcn.inst->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr)); >+ upper_32_bits(adev->vcn.inst->gpu_addr)); > offset = size; > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, > AMDGPU_UVD_FIRMWARE_OFFSET >> 3); >@@ -380,17 +387,17 @@ > > /* cache window 1: stack */ > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset)); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset)); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset)); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); > > /* cache window 2: context */ > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, >- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); > WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, >- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); > WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); > >@@ -426,10 +433,10 @@ > } else { > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), >- lower_32_bits(adev->vcn.gpu_addr), 0, indirect); >+ lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), >- upper_32_bits(adev->vcn.gpu_addr), 0, indirect); >+ upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); > offset = size; > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), >@@ -447,10 +454,10 @@ > if (!indirect) { > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), >- lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), >- upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); > } else { >@@ -467,10 +474,10 @@ > /* cache window 2: context */ > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), >- lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); >+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), >- upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); >+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( > UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); > WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( >@@ -658,7 +665,7 @@ > */ > static int jpeg_v2_0_start(struct amdgpu_device *adev) > { >- struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg; > uint32_t tmp; > int r = 0; > >@@ -920,7 +927,7 @@ > > static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) > { >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > uint32_t rb_bufsz, tmp; > > vcn_v2_0_enable_static_power_gating(adev); >@@ -1046,7 +1053,7 @@ > > static int vcn_v2_0_start(struct amdgpu_device *adev) > { >- struct amdgpu_ring *ring = &adev->vcn.ring_dec; >+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; > uint32_t rb_bufsz, tmp; > uint32_t lmi_swap_cntl; > int i, j, r; >@@ -1197,14 +1204,14 @@ > WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, > lower_32_bits(ring->wptr)); > >- ring = &adev->vcn.ring_enc[0]; >+ ring = &adev->vcn.inst->ring_enc[0]; > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); > >- ring = &adev->vcn.ring_enc[1]; >+ ring = &adev->vcn.inst->ring_enc[1]; > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); >@@ -1351,14 +1358,14 @@ > UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); > > /* Restore */ >- ring = &adev->vcn.ring_enc[0]; >+ ring = &adev->vcn.inst->ring_enc[0]; > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); > WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); > >- ring = &adev->vcn.ring_enc[1]; >+ ring = &adev->vcn.inst->ring_enc[1]; > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); > WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); > WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); >@@ -1480,11 +1487,13 @@ > * > * Write a start command to the ring. > */ >-static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) >+void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) > { >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); >+ struct amdgpu_device *adev = ring->adev; >+ >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); > amdgpu_ring_write(ring, 0); >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); > } > >@@ -1495,9 +1504,11 @@ > * > * Write a end command to the ring. > */ >-static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) >+void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) > { >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ struct amdgpu_device *adev = ring->adev; >+ >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); > } > >@@ -1508,14 +1519,15 @@ > * > * Write a nop command to the ring. > */ >-static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) >+void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > { >+ struct amdgpu_device *adev = ring->adev; > int i; > > WARN_ON(ring->wptr % 2 || count % 2); > > for (i = 0; i < count / 2; i++) { >- amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); > amdgpu_ring_write(ring, 0); > } > } >@@ -1528,30 +1540,31 @@ > * > * Write a fence and a trap command to the ring. > */ >-static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >- unsigned flags) >+void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >+ unsigned flags) > { >- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); >+ struct amdgpu_device *adev = ring->adev; > >- amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); >+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); > amdgpu_ring_write(ring, seq); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); > amdgpu_ring_write(ring, addr & 0xffffffff); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); > amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); > amdgpu_ring_write(ring, 0); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); > amdgpu_ring_write(ring, 0); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); > } >@@ -1564,44 +1577,46 @@ > * > * Write ring commands to execute the indirect buffer > */ >-static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, >- struct amdgpu_job *job, >- struct amdgpu_ib *ib, >- uint32_t flags) >+void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, >+ struct amdgpu_job *job, >+ struct amdgpu_ib *ib, >+ uint32_t flags) > { >+ struct amdgpu_device *adev = ring->adev; > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); > amdgpu_ring_write(ring, vmid); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); > amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); >- amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); > amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); >- amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); > amdgpu_ring_write(ring, ib->length_dw); > } > >-static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val, >- uint32_t mask) >+void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask) > { >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); >+ struct amdgpu_device *adev = ring->adev; >+ >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); > amdgpu_ring_write(ring, reg << 2); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); > amdgpu_ring_write(ring, val); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); > amdgpu_ring_write(ring, mask); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); > } > >-static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, >- unsigned vmid, uint64_t pd_addr) >+void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned vmid, uint64_t pd_addr) > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; > uint32_t data0, data1, mask; >@@ -1615,16 +1630,18 @@ > vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); > } > >-static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val) >+void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, >+ uint32_t reg, uint32_t val) > { >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); >+ struct amdgpu_device *adev = ring->adev; >+ >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); > amdgpu_ring_write(ring, reg << 2); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); > amdgpu_ring_write(ring, val); > >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); > } >@@ -1640,7 +1657,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) >+ if (ring == &adev->vcn.inst->ring_enc[0]) > return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); > else > return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); >@@ -1657,7 +1674,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) { >+ if (ring == &adev->vcn.inst->ring_enc[0]) { > if (ring->use_doorbell) > return adev->wb.wb[ring->wptr_offs]; > else >@@ -1681,7 +1698,7 @@ > { > struct amdgpu_device *adev = ring->adev; > >- if (ring == &adev->vcn.ring_enc[0]) { >+ if (ring == &adev->vcn.inst->ring_enc[0]) { > if (ring->use_doorbell) { > adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); > WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); >@@ -1706,8 +1723,8 @@ > * > * Write enc a fence and a trap command to the ring. > */ >-static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, >- u64 seq, unsigned flags) >+void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, >+ u64 seq, unsigned flags) > { > WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); > >@@ -1718,7 +1735,7 @@ > amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); > } > >-static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) >+void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) > { > amdgpu_ring_write(ring, VCN_ENC_CMD_END); > } >@@ -1731,10 +1748,10 @@ > * > * Write enc ring commands to execute the indirect buffer > */ >-static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, >- struct amdgpu_job *job, >- struct amdgpu_ib *ib, >- uint32_t flags) >+void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, >+ struct amdgpu_job *job, >+ struct amdgpu_ib *ib, >+ uint32_t flags) > { > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > >@@ -1745,9 +1762,8 @@ > amdgpu_ring_write(ring, ib->length_dw); > } > >-static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val, >- uint32_t mask) >+void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask) > { > amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); > amdgpu_ring_write(ring, reg << 2); >@@ -1755,8 +1771,8 @@ > amdgpu_ring_write(ring, val); > } > >-static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, >- unsigned int vmid, uint64_t pd_addr) >+void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned int vmid, uint64_t pd_addr) > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; > >@@ -1767,8 +1783,7 @@ > lower_32_bits(pd_addr), 0xffffffff); > } > >-static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val) >+void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) > { > amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); > amdgpu_ring_write(ring, reg << 2); >@@ -1832,7 +1847,7 @@ > * > * Write a start command to the ring. > */ >-static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) >+void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) > { > amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, > 0, 0, PACKETJ_TYPE0)); >@@ -1850,7 +1865,7 @@ > * > * Write a end command to the ring. > */ >-static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) >+void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) > { > amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, > 0, 0, PACKETJ_TYPE0)); >@@ -1869,8 +1884,8 @@ > * > * Write a fence and a trap command to the ring. > */ >-static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >- unsigned flags) >+void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >+ unsigned flags) > { > WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); > >@@ -1918,10 +1933,10 @@ > * > * Write ring commands to execute the indirect buffer. > */ >-static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, >- struct amdgpu_job *job, >- struct amdgpu_ib *ib, >- uint32_t flags) >+void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, >+ struct amdgpu_job *job, >+ struct amdgpu_ib *ib, >+ uint32_t flags) > { > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > >@@ -1969,9 +1984,8 @@ > amdgpu_ring_write(ring, 0x2); > } > >-static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val, >- uint32_t mask) >+void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask) > { > uint32_t reg_offset = (reg << 2); > >@@ -1997,8 +2011,8 @@ > amdgpu_ring_write(ring, mask); > } > >-static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, >- unsigned vmid, uint64_t pd_addr) >+void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned vmid, uint64_t pd_addr) > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; > uint32_t data0, data1, mask; >@@ -2012,8 +2026,7 @@ > vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); > } > >-static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, >- uint32_t reg, uint32_t val) >+void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) > { > uint32_t reg_offset = (reg << 2); > >@@ -2031,7 +2044,7 @@ > amdgpu_ring_write(ring, val); > } > >-static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) >+void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) > { > int i; > >@@ -2059,16 +2072,16 @@ > > switch (entry->src_id) { > case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: >- amdgpu_fence_process(&adev->vcn.ring_dec); >+ amdgpu_fence_process(&adev->vcn.inst->ring_dec); > break; > case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: >- amdgpu_fence_process(&adev->vcn.ring_enc[0]); >+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); > break; > case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: >- amdgpu_fence_process(&adev->vcn.ring_enc[1]); >+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); > break; > case VCN_2_0__SRCID__JPEG_DECODE: >- amdgpu_fence_process(&adev->vcn.ring_jpeg); >+ amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); > break; > default: > DRM_ERROR("Unhandled interrupt: %d %d\n", >@@ -2086,20 +2099,20 @@ > unsigned i; > int r; > >- WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); >+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); > r = amdgpu_ring_alloc(ring, 4); > if (r) > return r; >- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); >+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); > amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); > amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); > amdgpu_ring_write(ring, 0xDEADBEEF); > amdgpu_ring_commit(ring); > for (i = 0; i < adev->usec_timeout; i++) { >- tmp = RREG32(adev->vcn.external.scratch9); >+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); > if (tmp == 0xDEADBEEF) > break; >- DRM_UDELAY(1); >+ udelay(1); > } > > if (i >= adev->usec_timeout) >@@ -2158,7 +2171,7 @@ > static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { > .type = AMDGPU_RING_TYPE_VCN_DEC, > .align_mask = 0xf, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vcn_v2_0_dec_ring_get_rptr, > .get_wptr = vcn_v2_0_dec_ring_get_wptr, > .set_wptr = vcn_v2_0_dec_ring_set_wptr, >@@ -2189,7 +2202,7 @@ > .type = AMDGPU_RING_TYPE_VCN_ENC, > .align_mask = 0x3f, > .nop = VCN_ENC_CMD_NO_OP, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vcn_v2_0_enc_ring_get_rptr, > .get_wptr = vcn_v2_0_enc_ring_get_wptr, > .set_wptr = vcn_v2_0_enc_ring_set_wptr, >@@ -2218,7 +2231,7 @@ > static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { > .type = AMDGPU_RING_TYPE_VCN_JPEG, > .align_mask = 0xf, >- .vmhub = AMDGPU_MMHUB, >+ .vmhub = AMDGPU_MMHUB_0, > .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, > .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, > .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, >@@ -2247,7 +2260,7 @@ > > static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) > { >- adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; >+ adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; > DRM_INFO("VCN decode is enabled in VM mode\n"); > } > >@@ -2256,14 +2269,14 @@ > int i; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) >- adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; >+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; > > DRM_INFO("VCN encode is enabled in VM mode\n"); > } > > static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) > { >- adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; >+ adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; > DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); > } > >@@ -2274,8 +2287,8 @@ > > static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) > { >- adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; >- adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; >+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; >+ adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; > } > > const struct amdgpu_ip_block_version vcn_v2_0_ip_block = >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h 2019-08-31 15:01:11.851736168 -0500 >@@ -24,6 +24,44 @@ > #ifndef __VCN_V2_0_H__ > #define __VCN_V2_0_H__ > >+extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); >+extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); >+extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); >+extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >+ unsigned flags); >+extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, >+ struct amdgpu_ib *ib, uint32_t flags); >+extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask); >+extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned vmid, uint64_t pd_addr); >+extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, >+ uint32_t reg, uint32_t val); >+ >+extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); >+extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, >+ u64 seq, unsigned flags); >+extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, >+ struct amdgpu_ib *ib, uint32_t flags); >+extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask); >+extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned int vmid, uint64_t pd_addr); >+extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); >+ >+extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); >+extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); >+extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, >+ unsigned flags); >+extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, >+ struct amdgpu_ib *ib, uint32_t flags); >+extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >+ uint32_t val, uint32_t mask); >+extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, >+ unsigned vmid, uint64_t pd_addr); >+extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); >+extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); >+ > extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; > > #endif /* __VCN_V2_0_H__ */ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 2019-08-31 15:01:11.852736168 -0500 >@@ -0,0 +1,1414 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#include <linux/firmware.h> >+ >+#include "amdgpu.h" >+#include "amdgpu_vcn.h" >+#include "soc15.h" >+#include "soc15d.h" >+#include "vcn_v2_0.h" >+ >+#include "vcn/vcn_2_5_offset.h" >+#include "vcn/vcn_2_5_sh_mask.h" >+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" >+ >+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 >+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f >+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 >+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 >+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 >+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 >+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d >+ >+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 >+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 >+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 >+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c >+ >+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f >+ >+#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 >+ >+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); >+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); >+static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); >+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); >+static int vcn_v2_5_set_powergating_state(void *handle, >+ enum amd_powergating_state state); >+ >+static int amdgpu_ih_clientid_vcns[] = { >+ SOC15_IH_CLIENTID_VCN, >+ SOC15_IH_CLIENTID_VCN1 >+}; >+ >+/** >+ * vcn_v2_5_early_init - set function pointers >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * Set ring and irq function pointers >+ */ >+static int vcn_v2_5_early_init(void *handle) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ if (adev->asic_type == CHIP_ARCTURUS) { >+ u32 harvest; >+ int i; >+ >+ adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; >+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { >+ harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); >+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) >+ adev->vcn.harvest_config |= 1 << i; >+ } >+ >+ if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | >+ AMDGPU_VCN_HARVEST_VCN1)) >+ /* both instances are harvested, disable the block */ >+ return -ENOENT; >+ } else >+ adev->vcn.num_vcn_inst = 1; >+ >+ adev->vcn.num_enc_rings = 2; >+ >+ vcn_v2_5_set_dec_ring_funcs(adev); >+ vcn_v2_5_set_enc_ring_funcs(adev); >+ vcn_v2_5_set_jpeg_ring_funcs(adev); >+ vcn_v2_5_set_irq_funcs(adev); >+ >+ return 0; >+} >+ >+/** >+ * vcn_v2_5_sw_init - sw init for VCN block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * Load firmware and sw initialization >+ */ >+static int vcn_v2_5_sw_init(void *handle) >+{ >+ struct amdgpu_ring *ring; >+ int i, j, r; >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ >+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ /* VCN DEC TRAP */ >+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], >+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); >+ if (r) >+ return r; >+ >+ /* VCN ENC TRAP */ >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], >+ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); >+ if (r) >+ return r; >+ } >+ >+ /* VCN JPEG TRAP */ >+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], >+ VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); >+ if (r) >+ return r; >+ } >+ >+ r = amdgpu_vcn_sw_init(adev); >+ if (r) >+ return r; >+ >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >+ const struct common_firmware_header *hdr; >+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data; >+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; >+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); >+ >+ if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { >+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; >+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; >+ adev->firmware.fw_size += >+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); >+ } >+ DRM_INFO("PSP loading VCN firmware\n"); >+ } >+ >+ r = amdgpu_vcn_resume(adev); >+ if (r) >+ return r; >+ >+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; >+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; >+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; >+ >+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); >+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); >+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); >+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); >+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); >+ >+ adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; >+ adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); >+ >+ ring = &adev->vcn.inst[j].ring_dec; >+ ring->use_doorbell = true; >+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; >+ sprintf(ring->name, "vcn_dec_%d", j); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); >+ if (r) >+ return r; >+ >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ ring = &adev->vcn.inst[j].ring_enc[i]; >+ ring->use_doorbell = true; >+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; >+ sprintf(ring->name, "vcn_enc_%d.%d", j, i); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); >+ if (r) >+ return r; >+ } >+ >+ ring = &adev->vcn.inst[j].ring_jpeg; >+ ring->use_doorbell = true; >+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; >+ sprintf(ring->name, "vcn_jpeg_%d", j); >+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); >+ if (r) >+ return r; >+ } >+ >+ return 0; >+} >+ >+/** >+ * vcn_v2_5_sw_fini - sw fini for VCN block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * VCN suspend and free up sw allocation >+ */ >+static int vcn_v2_5_sw_fini(void *handle) >+{ >+ int r; >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ >+ r = amdgpu_vcn_suspend(adev); >+ if (r) >+ return r; >+ >+ r = amdgpu_vcn_sw_fini(adev); >+ >+ return r; >+} >+ >+/** >+ * vcn_v2_5_hw_init - start and test VCN block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * Initialize the hardware, boot up the VCPU and do some testing >+ */ >+static int vcn_v2_5_hw_init(void *handle) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ struct amdgpu_ring *ring; >+ int i, j, r; >+ >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ ring = &adev->vcn.inst[j].ring_dec; >+ >+ adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, >+ ring->doorbell_index, j); >+ >+ r = amdgpu_ring_test_ring(ring); >+ if (r) { >+ ring->sched.ready = false; >+ goto done; >+ } >+ >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ ring = &adev->vcn.inst[j].ring_enc[i]; >+ ring->sched.ready = false; >+ continue; >+ r = amdgpu_ring_test_ring(ring); >+ if (r) { >+ ring->sched.ready = false; >+ goto done; >+ } >+ } >+ >+ ring = &adev->vcn.inst[j].ring_jpeg; >+ r = amdgpu_ring_test_ring(ring); >+ if (r) { >+ ring->sched.ready = false; >+ goto done; >+ } >+ } >+done: >+ if (!r) >+ DRM_INFO("VCN decode and encode initialized successfully.\n"); >+ >+ return r; >+} >+ >+/** >+ * vcn_v2_5_hw_fini - stop the hardware block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * Stop the VCN block, mark ring as not ready any more >+ */ >+static int vcn_v2_5_hw_fini(void *handle) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ struct amdgpu_ring *ring; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ ring = &adev->vcn.inst[i].ring_dec; >+ >+ if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) >+ vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); >+ >+ ring->sched.ready = false; >+ >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ ring = &adev->vcn.inst[i].ring_enc[i]; >+ ring->sched.ready = false; >+ } >+ >+ ring = &adev->vcn.inst[i].ring_jpeg; >+ ring->sched.ready = false; >+ } >+ >+ return 0; >+} >+ >+/** >+ * vcn_v2_5_suspend - suspend VCN block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * HW fini and suspend VCN block >+ */ >+static int vcn_v2_5_suspend(void *handle) >+{ >+ int r; >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ >+ r = vcn_v2_5_hw_fini(adev); >+ if (r) >+ return r; >+ >+ r = amdgpu_vcn_suspend(adev); >+ >+ return r; >+} >+ >+/** >+ * vcn_v2_5_resume - resume VCN block >+ * >+ * @handle: amdgpu_device pointer >+ * >+ * Resume firmware and hw init VCN block >+ */ >+static int vcn_v2_5_resume(void *handle) >+{ >+ int r; >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ >+ r = amdgpu_vcn_resume(adev); >+ if (r) >+ return r; >+ >+ r = vcn_v2_5_hw_init(adev); >+ >+ return r; >+} >+ >+/** >+ * vcn_v2_5_mc_resume - memory controller programming >+ * >+ * @adev: amdgpu_device pointer >+ * >+ * Let the VCN memory controller know it's offsets >+ */ >+static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) >+{ >+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); >+ uint32_t offset; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* cache window 0: fw */ >+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, >+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, >+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); >+ offset = 0; >+ } else { >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, >+ lower_32_bits(adev->vcn.inst[i].gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, >+ upper_32_bits(adev->vcn.inst[i].gpu_addr)); >+ offset = size; >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, >+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3); >+ } >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); >+ >+ /* cache window 1: stack */ >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, >+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, >+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); >+ >+ /* cache window 2: context */ >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, >+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, >+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); >+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); >+ } >+} >+ >+/** >+ * vcn_v2_5_disable_clock_gating - disable VCN clock gating >+ * >+ * @adev: amdgpu_device pointer >+ * @sw: enable SW clock gating >+ * >+ * Disable clock gating for VCN block >+ */ >+static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) >+{ >+ uint32_t data; >+ int ret = 0; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* UVD disable CGC */ >+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); >+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) >+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; >+ else >+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; >+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; >+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; >+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); >+ >+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); >+ data &= ~(UVD_CGC_GATE__SYS_MASK >+ | UVD_CGC_GATE__UDEC_MASK >+ | UVD_CGC_GATE__MPEG2_MASK >+ | UVD_CGC_GATE__REGS_MASK >+ | UVD_CGC_GATE__RBC_MASK >+ | UVD_CGC_GATE__LMI_MC_MASK >+ | UVD_CGC_GATE__LMI_UMC_MASK >+ | UVD_CGC_GATE__IDCT_MASK >+ | UVD_CGC_GATE__MPRD_MASK >+ | UVD_CGC_GATE__MPC_MASK >+ | UVD_CGC_GATE__LBSI_MASK >+ | UVD_CGC_GATE__LRBBM_MASK >+ | UVD_CGC_GATE__UDEC_RE_MASK >+ | UVD_CGC_GATE__UDEC_CM_MASK >+ | UVD_CGC_GATE__UDEC_IT_MASK >+ | UVD_CGC_GATE__UDEC_DB_MASK >+ | UVD_CGC_GATE__UDEC_MP_MASK >+ | UVD_CGC_GATE__WCB_MASK >+ | UVD_CGC_GATE__VCPU_MASK >+ | UVD_CGC_GATE__MMSCH_MASK); >+ >+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); >+ >+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); >+ >+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); >+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK >+ | UVD_CGC_CTRL__SYS_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_MODE_MASK >+ | UVD_CGC_CTRL__MPEG2_MODE_MASK >+ | UVD_CGC_CTRL__REGS_MODE_MASK >+ | UVD_CGC_CTRL__RBC_MODE_MASK >+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK >+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK >+ | UVD_CGC_CTRL__IDCT_MODE_MASK >+ | UVD_CGC_CTRL__MPRD_MODE_MASK >+ | UVD_CGC_CTRL__MPC_MODE_MASK >+ | UVD_CGC_CTRL__LBSI_MODE_MASK >+ | UVD_CGC_CTRL__LRBBM_MODE_MASK >+ | UVD_CGC_CTRL__WCB_MODE_MASK >+ | UVD_CGC_CTRL__VCPU_MODE_MASK >+ | UVD_CGC_CTRL__MMSCH_MODE_MASK); >+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); >+ >+ /* turn on */ >+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); >+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK >+ | UVD_SUVD_CGC_GATE__SIT_MASK >+ | UVD_SUVD_CGC_GATE__SMP_MASK >+ | UVD_SUVD_CGC_GATE__SCM_MASK >+ | UVD_SUVD_CGC_GATE__SDB_MASK >+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK >+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK >+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK >+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK >+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK >+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK >+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK >+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK >+ | UVD_SUVD_CGC_GATE__SCLR_MASK >+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK >+ | UVD_SUVD_CGC_GATE__ENT_MASK >+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK >+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK >+ | UVD_SUVD_CGC_GATE__SITE_MASK >+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK >+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK >+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK >+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK >+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); >+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); >+ >+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); >+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); >+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); >+ } >+} >+ >+/** >+ * vcn_v2_5_enable_clock_gating - enable VCN clock gating >+ * >+ * @adev: amdgpu_device pointer >+ * @sw: enable SW clock gating >+ * >+ * Enable clock gating for VCN block >+ */ >+static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) >+{ >+ uint32_t data = 0; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* enable UVD CGC */ >+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); >+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) >+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; >+ else >+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; >+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; >+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; >+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); >+ >+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); >+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK >+ | UVD_CGC_CTRL__SYS_MODE_MASK >+ | UVD_CGC_CTRL__UDEC_MODE_MASK >+ | UVD_CGC_CTRL__MPEG2_MODE_MASK >+ | UVD_CGC_CTRL__REGS_MODE_MASK >+ | UVD_CGC_CTRL__RBC_MODE_MASK >+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK >+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK >+ | UVD_CGC_CTRL__IDCT_MODE_MASK >+ | UVD_CGC_CTRL__MPRD_MODE_MASK >+ | UVD_CGC_CTRL__MPC_MODE_MASK >+ | UVD_CGC_CTRL__LBSI_MODE_MASK >+ | UVD_CGC_CTRL__LRBBM_MODE_MASK >+ | UVD_CGC_CTRL__WCB_MODE_MASK >+ | UVD_CGC_CTRL__VCPU_MODE_MASK); >+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); >+ >+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); >+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK >+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); >+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); >+ } >+} >+ >+/** >+ * jpeg_v2_5_start - start JPEG block >+ * >+ * @adev: amdgpu_device pointer >+ * >+ * Setup and start the JPEG block >+ */ >+static int jpeg_v2_5_start(struct amdgpu_device *adev) >+{ >+ struct amdgpu_ring *ring; >+ uint32_t tmp; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ ring = &adev->vcn.inst[i].ring_jpeg; >+ /* disable anti hang mechanism */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, >+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); >+ >+ /* JPEG disable CGC */ >+ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); >+ tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; >+ tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; >+ tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; >+ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); >+ >+ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); >+ tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK >+ | JPEG_CGC_GATE__JPEG2_DEC_MASK >+ | JPEG_CGC_GATE__JMCIF_MASK >+ | JPEG_CGC_GATE__JRBBM_MASK); >+ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); >+ >+ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); >+ tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK >+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK >+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK >+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK); >+ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); >+ >+ /* MJPEG global tiling registers */ >+ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, >+ adev->gfx.config.gb_addr_config); >+ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, >+ adev->gfx.config.gb_addr_config); >+ >+ /* enable JMI channel */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, >+ ~UVD_JMI_CNTL__SOFT_RESET_MASK); >+ >+ /* enable System Interrupt for JRBC */ >+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), >+ JPEG_SYS_INT_EN__DJRBC_MASK, >+ ~JPEG_SYS_INT_EN__DJRBC_MASK); >+ >+ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); >+ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, >+ lower_32_bits(ring->gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, >+ upper_32_bits(ring->gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); >+ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); >+ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); >+ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); >+ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); >+ } >+ >+ return 0; >+} >+ >+/** >+ * jpeg_v2_5_stop - stop JPEG block >+ * >+ * @adev: amdgpu_device pointer >+ * >+ * stop the JPEG block >+ */ >+static int jpeg_v2_5_stop(struct amdgpu_device *adev) >+{ >+ uint32_t tmp; >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* reset JMI */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), >+ UVD_JMI_CNTL__SOFT_RESET_MASK, >+ ~UVD_JMI_CNTL__SOFT_RESET_MASK); >+ >+ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); >+ tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK >+ |JPEG_CGC_GATE__JPEG2_DEC_MASK >+ |JPEG_CGC_GATE__JMCIF_MASK >+ |JPEG_CGC_GATE__JRBBM_MASK); >+ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); >+ >+ /* enable anti hang mechanism */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), >+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, >+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); >+ } >+ >+ return 0; >+} >+ >+static int vcn_v2_5_start(struct amdgpu_device *adev) >+{ >+ struct amdgpu_ring *ring; >+ uint32_t rb_bufsz, tmp; >+ int i, j, k, r; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* disable register anti-hang mechanism */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, >+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); >+ >+ /* set uvd status busy */ >+ tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; >+ WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); >+ } >+ >+ /*SW clock gating */ >+ vcn_v2_5_disable_clock_gating(adev); >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* enable VCPU clock */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), >+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); >+ >+ /* disable master interrupt */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, >+ ~UVD_MASTINT_EN__VCPU_EN_MASK); >+ >+ /* setup mmUVD_LMI_CTRL */ >+ tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); >+ tmp &= ~0xff; >+ WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| >+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | >+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK | >+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | >+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); >+ >+ /* setup mmUVD_MPC_CNTL */ >+ tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); >+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; >+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; >+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); >+ >+ /* setup UVD_MPC_SET_MUXA0 */ >+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, >+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | >+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | >+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | >+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); >+ >+ /* setup UVD_MPC_SET_MUXB0 */ >+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, >+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | >+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | >+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | >+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); >+ >+ /* setup mmUVD_MPC_SET_MUX */ >+ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, >+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | >+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | >+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); >+ } >+ >+ vcn_v2_5_mc_resume(adev); >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* VCN global tiling registers */ >+ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, >+ adev->gfx.config.gb_addr_config); >+ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, >+ adev->gfx.config.gb_addr_config); >+ >+ /* enable LMI MC and UMC channels */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, >+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); >+ >+ /* unblock VCPU register access */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, >+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); >+ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, >+ ~UVD_VCPU_CNTL__BLK_RST_MASK); >+ >+ for (k = 0; k < 10; ++k) { >+ uint32_t status; >+ >+ for (j = 0; j < 100; ++j) { >+ status = RREG32_SOC15(UVD, i, mmUVD_STATUS); >+ if (status & 2) >+ break; >+ if (amdgpu_emu_mode == 1) >+ msleep(500); >+ else >+ mdelay(10); >+ } >+ r = 0; >+ if (status & 2) >+ break; >+ >+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), >+ UVD_VCPU_CNTL__BLK_RST_MASK, >+ ~UVD_VCPU_CNTL__BLK_RST_MASK); >+ mdelay(10); >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, >+ ~UVD_VCPU_CNTL__BLK_RST_MASK); >+ >+ mdelay(10); >+ r = -1; >+ } >+ >+ if (r) { >+ DRM_ERROR("VCN decode not responding, giving up!!!\n"); >+ return r; >+ } >+ >+ /* enable master interrupt */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), >+ UVD_MASTINT_EN__VCPU_EN_MASK, >+ ~UVD_MASTINT_EN__VCPU_EN_MASK); >+ >+ /* clear the busy bit of VCN_STATUS */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, >+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); >+ >+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); >+ >+ ring = &adev->vcn.inst[i].ring_dec; >+ /* force RBC into idle state */ >+ rb_bufsz = order_base_2(ring->ring_size); >+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); >+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); >+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); >+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); >+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); >+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); >+ >+ /* programm the RB_BASE for ring buffer */ >+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, >+ lower_32_bits(ring->gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, >+ upper_32_bits(ring->gpu_addr)); >+ >+ /* Initialize the ring buffer's read and write pointers */ >+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); >+ >+ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); >+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, >+ lower_32_bits(ring->wptr)); >+ ring = &adev->vcn.inst[i].ring_enc[0]; >+ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); >+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); >+ >+ ring = &adev->vcn.inst[i].ring_enc[1]; >+ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); >+ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); >+ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); >+ } >+ r = jpeg_v2_5_start(adev); >+ >+ return r; >+} >+ >+static int vcn_v2_5_stop(struct amdgpu_device *adev) >+{ >+ uint32_t tmp; >+ int i, r; >+ >+ r = jpeg_v2_5_stop(adev); >+ if (r) >+ return r; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ /* wait for vcn idle */ >+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); >+ if (r) >+ return r; >+ >+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | >+ UVD_LMI_STATUS__READ_CLEAN_MASK | >+ UVD_LMI_STATUS__WRITE_CLEAN_MASK | >+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; >+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); >+ if (r) >+ return r; >+ >+ /* block LMI UMC channel */ >+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); >+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; >+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); >+ >+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| >+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; >+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); >+ if (r) >+ return r; >+ >+ /* block VCPU register access */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), >+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK, >+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); >+ >+ /* reset VCPU */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), >+ UVD_VCPU_CNTL__BLK_RST_MASK, >+ ~UVD_VCPU_CNTL__BLK_RST_MASK); >+ >+ /* disable VCPU clock */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, >+ ~(UVD_VCPU_CNTL__CLK_EN_MASK)); >+ >+ /* clear status */ >+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); >+ >+ vcn_v2_5_enable_clock_gating(adev); >+ >+ /* enable register anti-hang mechanism */ >+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), >+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, >+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); >+ } >+ >+ return 0; >+} >+ >+/** >+ * vcn_v2_5_dec_ring_get_rptr - get read pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware read pointer >+ */ >+static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); >+} >+ >+/** >+ * vcn_v2_5_dec_ring_get_wptr - get write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware write pointer >+ */ >+static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring->use_doorbell) >+ return adev->wb.wb[ring->wptr_offs]; >+ else >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); >+} >+ >+/** >+ * vcn_v2_5_dec_ring_set_wptr - set write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Commits the write pointer to the hardware >+ */ >+static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring->use_doorbell) { >+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); >+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); >+ } else { >+ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); >+ } >+} >+ >+static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { >+ .type = AMDGPU_RING_TYPE_VCN_DEC, >+ .align_mask = 0xf, >+ .vmhub = AMDGPU_MMHUB_1, >+ .get_rptr = vcn_v2_5_dec_ring_get_rptr, >+ .get_wptr = vcn_v2_5_dec_ring_get_wptr, >+ .set_wptr = vcn_v2_5_dec_ring_set_wptr, >+ .emit_frame_size = >+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + >+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + >+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ >+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ >+ 6, >+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ >+ .emit_ib = vcn_v2_0_dec_ring_emit_ib, >+ .emit_fence = vcn_v2_0_dec_ring_emit_fence, >+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, >+ .test_ring = amdgpu_vcn_dec_ring_test_ring, >+ .test_ib = amdgpu_vcn_dec_ring_test_ib, >+ .insert_nop = vcn_v2_0_dec_ring_insert_nop, >+ .insert_start = vcn_v2_0_dec_ring_insert_start, >+ .insert_end = vcn_v2_0_dec_ring_insert_end, >+ .pad_ib = amdgpu_ring_generic_pad_ib, >+ .begin_use = amdgpu_vcn_ring_begin_use, >+ .end_use = amdgpu_vcn_ring_end_use, >+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, >+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, >+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, >+}; >+ >+/** >+ * vcn_v2_5_enc_ring_get_rptr - get enc read pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware enc read pointer >+ */ >+static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); >+ else >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); >+} >+ >+/** >+ * vcn_v2_5_enc_ring_get_wptr - get enc write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware enc write pointer >+ */ >+static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { >+ if (ring->use_doorbell) >+ return adev->wb.wb[ring->wptr_offs]; >+ else >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); >+ } else { >+ if (ring->use_doorbell) >+ return adev->wb.wb[ring->wptr_offs]; >+ else >+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); >+ } >+} >+ >+/** >+ * vcn_v2_5_enc_ring_set_wptr - set enc write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Commits the enc write pointer to the hardware >+ */ >+static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { >+ if (ring->use_doorbell) { >+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); >+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); >+ } else { >+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); >+ } >+ } else { >+ if (ring->use_doorbell) { >+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); >+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); >+ } else { >+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); >+ } >+ } >+} >+ >+static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { >+ .type = AMDGPU_RING_TYPE_VCN_ENC, >+ .align_mask = 0x3f, >+ .nop = VCN_ENC_CMD_NO_OP, >+ .vmhub = AMDGPU_MMHUB_1, >+ .get_rptr = vcn_v2_5_enc_ring_get_rptr, >+ .get_wptr = vcn_v2_5_enc_ring_get_wptr, >+ .set_wptr = vcn_v2_5_enc_ring_set_wptr, >+ .emit_frame_size = >+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + >+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + >+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ >+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ >+ 1, /* vcn_v2_0_enc_ring_insert_end */ >+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ >+ .emit_ib = vcn_v2_0_enc_ring_emit_ib, >+ .emit_fence = vcn_v2_0_enc_ring_emit_fence, >+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, >+ .test_ring = amdgpu_vcn_enc_ring_test_ring, >+ .test_ib = amdgpu_vcn_enc_ring_test_ib, >+ .insert_nop = amdgpu_ring_insert_nop, >+ .insert_end = vcn_v2_0_enc_ring_insert_end, >+ .pad_ib = amdgpu_ring_generic_pad_ib, >+ .begin_use = amdgpu_vcn_ring_begin_use, >+ .end_use = amdgpu_vcn_ring_end_use, >+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, >+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, >+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, >+}; >+ >+/** >+ * vcn_v2_5_jpeg_ring_get_rptr - get read pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware read pointer >+ */ >+static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); >+} >+ >+/** >+ * vcn_v2_5_jpeg_ring_get_wptr - get write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Returns the current hardware write pointer >+ */ >+static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring->use_doorbell) >+ return adev->wb.wb[ring->wptr_offs]; >+ else >+ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); >+} >+ >+/** >+ * vcn_v2_5_jpeg_ring_set_wptr - set write pointer >+ * >+ * @ring: amdgpu_ring pointer >+ * >+ * Commits the write pointer to the hardware >+ */ >+static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) >+{ >+ struct amdgpu_device *adev = ring->adev; >+ >+ if (ring->use_doorbell) { >+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); >+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); >+ } else { >+ WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); >+ } >+} >+ >+static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { >+ .type = AMDGPU_RING_TYPE_VCN_JPEG, >+ .align_mask = 0xf, >+ .vmhub = AMDGPU_MMHUB_1, >+ .get_rptr = vcn_v2_5_jpeg_ring_get_rptr, >+ .get_wptr = vcn_v2_5_jpeg_ring_get_wptr, >+ .set_wptr = vcn_v2_5_jpeg_ring_set_wptr, >+ .emit_frame_size = >+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + >+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + >+ 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ >+ 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ >+ 8 + 16, >+ .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ >+ .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, >+ .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, >+ .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, >+ .test_ring = amdgpu_vcn_jpeg_ring_test_ring, >+ .test_ib = amdgpu_vcn_jpeg_ring_test_ib, >+ .insert_nop = vcn_v2_0_jpeg_ring_nop, >+ .insert_start = vcn_v2_0_jpeg_ring_insert_start, >+ .insert_end = vcn_v2_0_jpeg_ring_insert_end, >+ .pad_ib = amdgpu_ring_generic_pad_ib, >+ .begin_use = amdgpu_vcn_ring_begin_use, >+ .end_use = amdgpu_vcn_ring_end_use, >+ .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, >+ .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, >+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, >+}; >+ >+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; >+ adev->vcn.inst[i].ring_dec.me = i; >+ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); >+ } >+} >+ >+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) >+{ >+ int i, j; >+ >+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { >+ if (adev->vcn.harvest_config & (1 << j)) >+ continue; >+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { >+ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; >+ adev->vcn.inst[j].ring_enc[i].me = j; >+ } >+ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); >+ } >+} >+ >+static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; >+ adev->vcn.inst[i].ring_jpeg.me = i; >+ DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); >+ } >+} >+ >+static bool vcn_v2_5_is_idle(void *handle) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ int i, ret = 1; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); >+ } >+ >+ return ret; >+} >+ >+static int vcn_v2_5_wait_for_idle(void *handle) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ int i, ret = 0; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, >+ UVD_STATUS__IDLE, ret); >+ if (ret) >+ return ret; >+ } >+ >+ return ret; >+} >+ >+static int vcn_v2_5_set_clockgating_state(void *handle, >+ enum amd_clockgating_state state) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false; >+ >+ if (enable) { >+ if (vcn_v2_5_is_idle(handle)) >+ return -EBUSY; >+ vcn_v2_5_enable_clock_gating(adev); >+ } else { >+ vcn_v2_5_disable_clock_gating(adev); >+ } >+ >+ return 0; >+} >+ >+static int vcn_v2_5_set_powergating_state(void *handle, >+ enum amd_powergating_state state) >+{ >+ struct amdgpu_device *adev = (struct amdgpu_device *)handle; >+ int ret; >+ >+ if(state == adev->vcn.cur_state) >+ return 0; >+ >+ if (state == AMD_PG_STATE_GATE) >+ ret = vcn_v2_5_stop(adev); >+ else >+ ret = vcn_v2_5_start(adev); >+ >+ if(!ret) >+ adev->vcn.cur_state = state; >+ >+ return ret; >+} >+ >+static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, >+ struct amdgpu_irq_src *source, >+ unsigned type, >+ enum amdgpu_interrupt_state state) >+{ >+ return 0; >+} >+ >+static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, >+ struct amdgpu_irq_src *source, >+ struct amdgpu_iv_entry *entry) >+{ >+ uint32_t ip_instance; >+ >+ switch (entry->client_id) { >+ case SOC15_IH_CLIENTID_VCN: >+ ip_instance = 0; >+ break; >+ case SOC15_IH_CLIENTID_VCN1: >+ ip_instance = 1; >+ break; >+ default: >+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id); >+ return 0; >+ } >+ >+ DRM_DEBUG("IH: VCN TRAP\n"); >+ >+ switch (entry->src_id) { >+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: >+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); >+ break; >+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: >+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); >+ break; >+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: >+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); >+ break; >+ case VCN_2_0__SRCID__JPEG_DECODE: >+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); >+ break; >+ default: >+ DRM_ERROR("Unhandled interrupt: %d %d\n", >+ entry->src_id, entry->src_data[0]); >+ break; >+ } >+ >+ return 0; >+} >+ >+static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { >+ .set = vcn_v2_5_set_interrupt_state, >+ .process = vcn_v2_5_process_interrupt, >+}; >+ >+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) >+{ >+ int i; >+ >+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { >+ if (adev->vcn.harvest_config & (1 << i)) >+ continue; >+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; >+ adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; >+ } >+} >+ >+static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { >+ .name = "vcn_v2_5", >+ .early_init = vcn_v2_5_early_init, >+ .late_init = NULL, >+ .sw_init = vcn_v2_5_sw_init, >+ .sw_fini = vcn_v2_5_sw_fini, >+ .hw_init = vcn_v2_5_hw_init, >+ .hw_fini = vcn_v2_5_hw_fini, >+ .suspend = vcn_v2_5_suspend, >+ .resume = vcn_v2_5_resume, >+ .is_idle = vcn_v2_5_is_idle, >+ .wait_for_idle = vcn_v2_5_wait_for_idle, >+ .check_soft_reset = NULL, >+ .pre_soft_reset = NULL, >+ .soft_reset = NULL, >+ .post_soft_reset = NULL, >+ .set_clockgating_state = vcn_v2_5_set_clockgating_state, >+ .set_powergating_state = vcn_v2_5_set_powergating_state, >+}; >+ >+const struct amdgpu_ip_block_version vcn_v2_5_ip_block = >+{ >+ .type = AMD_IP_BLOCK_TYPE_VCN, >+ .major = 2, >+ .minor = 5, >+ .rev = 0, >+ .funcs = &vcn_v2_5_ip_funcs, >+}; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h 2019-08-31 15:01:11.852736168 -0500 >@@ -0,0 +1,29 @@ >+/* >+ * Copyright 2019 Advanced Micro Devices, Inc. >+ * >+ * Permission is hereby granted, free of charge, to any person obtaining a >+ * copy of this software and associated documentation files (the "Software"), >+ * to deal in the Software without restriction, including without limitation >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, >+ * and/or sell copies of the Software, and to permit persons to whom the >+ * Software is furnished to do so, subject to the following conditions: >+ * >+ * The above copyright notice and this permission notice shall be included in >+ * all copies or substantial portions of the Software. >+ * >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >+ * OTHER DEALINGS IN THE SOFTWARE. >+ * >+ */ >+ >+#ifndef __VCN_V2_5_H__ >+#define __VCN_V2_5_H__ >+ >+extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; >+ >+#endif /* __VCN_V2_5_H__ */ >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_ih.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_ih.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 2019-08-31 15:01:11.852736168 -0500 >@@ -50,7 +50,7 @@ > > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { > DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); > return; >@@ -64,7 +64,7 @@ > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, > RB_ENABLE, 1); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); >@@ -80,7 +80,7 @@ > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, > RB_ENABLE, 1); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); >@@ -106,7 +106,7 @@ > > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { > DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); > return; >@@ -125,7 +125,7 @@ > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, > RB_ENABLE, 0); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); >@@ -145,7 +145,7 @@ > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, > RB_ENABLE, 0); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); >@@ -219,7 +219,7 @@ > static int vega10_ih_irq_init(struct amdgpu_device *adev) > { > struct amdgpu_ih_ring *ih; >- u32 ih_rb_cntl; >+ u32 ih_rb_cntl, ih_chicken; > int ret = 0; > u32 tmp; > >@@ -234,11 +234,17 @@ > WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); > > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); >+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); > ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); >+ if (adev->irq.ih.use_bus_addr) { >+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); >+ } else { >+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1); >+ } > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, > !!adev->irq.msi_enabled); > >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { > DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); > return -ETIMEDOUT; >@@ -247,6 +253,11 @@ > WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); > } > >+ if ((adev->asic_type == CHIP_ARCTURUS >+ && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) >+ || adev->asic_type == CHIP_RENOIR) >+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); >+ > /* set the writeback address whether it's enabled or not */ > WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, > lower_32_bits(ih->wptr_addr)); >@@ -272,7 +283,7 @@ > WPTR_OVERFLOW_ENABLE, 0); > ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, > RB_FULL_DRAIN_ENABLE, 1); >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); >@@ -299,7 +310,7 @@ > ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); > ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); > >- if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { >+ if (amdgpu_sriov_vf(adev)) { > if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, > ih_rb_cntl)) { > DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 2019-08-31 15:01:11.852736168 -0500 >@@ -81,6 +81,10 @@ > adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; > adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; > adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; >+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1; >+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3; >+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5; >+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7; > > adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP; > adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 2019-08-31 15:01:11.852736168 -0500 >@@ -50,6 +50,8 @@ > adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); > adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); > adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); >+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); >+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); > } > return 0; > } >@@ -85,6 +87,10 @@ > adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; > adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; > adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; >+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; >+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; >+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; >+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7; > > adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; > adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vi.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vi.c >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vi.c 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vi.c 2019-08-31 15:01:11.852736168 -0500 >@@ -711,6 +711,12 @@ > return r; > } > >+static enum amd_reset_method >+vi_asic_reset_method(struct amdgpu_device *adev) >+{ >+ return AMD_RESET_METHOD_LEGACY; >+} >+ > static u32 vi_get_config_memsize(struct amdgpu_device *adev) > { > return RREG32(mmCONFIG_MEMSIZE); >@@ -1023,6 +1029,7 @@ > .read_bios_from_rom = &vi_read_bios_from_rom, > .read_register = &vi_read_register, > .reset = &vi_asic_reset, >+ .reset_method = &vi_asic_reset_method, > .set_vga_state = &vi_vga_set_state, > .get_xclk = &vi_get_xclk, > .set_uvd_clocks = &vi_set_uvd_clocks, >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 2019-08-31 15:01:11.853736168 -0500 >@@ -20,1105 +20,947 @@ > * OTHER DEALINGS IN THE SOFTWARE. > */ > >+var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 >+var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 >+var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 >+var SQ_WAVE_STATUS_HALT_MASK = 0x2000 > >-shader main >- >-asic(DEFAULT) >- >-type(CS) >+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 >+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 >+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 >+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 >+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 >+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 >+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 >+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 >+var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 >+var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 >+ >+var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 >+var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF >+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 >+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 >+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 >+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF >+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 >+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 >+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 >+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 >+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 >+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 >+ >+var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 >+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 >+var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25 >+var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1 >+var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000 >+var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 >+var SQ_WAVE_IB_STS_RCNT_SIZE = 6 >+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 >+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF >+ >+var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 >+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 >+ >+// bits [31:24] unused by SPI debug data >+var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 >+var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000 >+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24 >+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000 >+ >+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] >+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE >+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 >+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC >+ >+var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 >+var S_SAVE_SPI_INIT_ATC_SHIFT = 27 >+var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 >+var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 >+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 >+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 >+ >+var S_SAVE_PC_HI_RCNT_SHIFT = 26 >+var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000 >+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25 >+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000 >+var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24 >+var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000 >+ >+var s_sgpr_save_num = 108 >+ >+var s_save_spi_init_lo = exec_lo >+var s_save_spi_init_hi = exec_hi >+var s_save_pc_lo = ttmp0 >+var s_save_pc_hi = ttmp1 >+var s_save_exec_lo = ttmp2 >+var s_save_exec_hi = ttmp3 >+var s_save_status = ttmp12 >+var s_save_trapsts = ttmp5 >+var s_save_xnack_mask = ttmp6 >+var s_wave_size = ttmp7 >+var s_save_buf_rsrc0 = ttmp8 >+var s_save_buf_rsrc1 = ttmp9 >+var s_save_buf_rsrc2 = ttmp10 >+var s_save_buf_rsrc3 = ttmp11 >+var s_save_mem_offset = ttmp14 >+var s_save_alloc_size = s_save_trapsts >+var s_save_tmp = s_save_buf_rsrc2 >+var s_save_m0 = ttmp15 >+ >+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE >+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC >+ >+var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 >+var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 >+var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 >+var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 >+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 >+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 >+var S_WAVE_SIZE = 25 >+ >+var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT >+var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK >+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK >+ >+var s_restore_spi_init_lo = exec_lo >+var s_restore_spi_init_hi = exec_hi >+var s_restore_mem_offset = ttmp12 >+var s_restore_alloc_size = ttmp3 >+var s_restore_tmp = ttmp6 >+var s_restore_mem_offset_save = s_restore_tmp >+var s_restore_m0 = s_restore_alloc_size >+var s_restore_mode = ttmp7 >+var s_restore_flat_scratch = ttmp2 >+var s_restore_pc_lo = ttmp0 >+var s_restore_pc_hi = ttmp1 >+var s_restore_exec_lo = ttmp14 >+var s_restore_exec_hi = ttmp15 >+var s_restore_status = ttmp4 >+var s_restore_trapsts = ttmp5 >+var s_restore_xnack_mask = ttmp13 >+var s_restore_buf_rsrc0 = ttmp8 >+var s_restore_buf_rsrc1 = ttmp9 >+var s_restore_buf_rsrc2 = ttmp10 >+var s_restore_buf_rsrc3 = ttmp11 >+var s_restore_size = ttmp7 > >-wave_size(32) >-/*************************************************************************/ >-/* control on how to run the shader */ >-/*************************************************************************/ >-//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) >-var EMU_RUN_HACK = 0 >-var EMU_RUN_HACK_RESTORE_NORMAL = 0 >-var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 >-var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 >-var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK >-var SAVE_LDS = 0 >-var WG_BASE_ADDR_LO = 0x9000a000 >-var WG_BASE_ADDR_HI = 0x0 >-var WAVE_SPACE = 0x9000 //memory size that each wave occupies in workgroup state mem, increase from 5000 to 9000 for more SGPR need to be saved >-var CTX_SAVE_CONTROL = 0x0 >-var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL >-var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) >-var SGPR_SAVE_USE_SQC = 0 //use SQC D$ to do the write >-var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //need to change BUF_DATA_FORMAT in S_SAVE_BUF_RSRC_WORD3_MISC from 0 to BUF_DATA_FORMAT_32 if set to 1 (i.e. 0x00827FAC) >-var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing >-var SAVE_RESTORE_HWID_DDID = 0 >-var RESTORE_DDID_IN_SGPR18 = 0 >-/**************************************************************************/ >-/* variables */ >-/**************************************************************************/ >-var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 >-var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 >-var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 >+shader main >+ asic(DEFAULT) >+ type(CS) >+ wave_size(32) > >-var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 >-var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 >-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 >-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 >-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 >-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits >-var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 >-var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 >-var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 >-var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 >- >-var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 >-var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask >-var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 >-var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 >-var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 >-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF >-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 >-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 >-var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 >-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 >-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 >- >-var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME >-var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME >-var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 //FIXME >-var SQ_WAVE_IB_STS_RCNT_SIZE = 6 //FIXME >-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME >- >-var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 >-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 >- >- >-/* Save */ >-var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes >-var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE >- >-var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit >-var S_SAVE_SPI_INIT_ATC_SHIFT = 27 >-var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype >-var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 >-var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG >-var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 >- >-var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used >-var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME >-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME >-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME >- >-var s_save_spi_init_lo = exec_lo >-var s_save_spi_init_hi = exec_hi >- >-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} >-var s_save_pc_hi = ttmp1 >-var s_save_exec_lo = ttmp2 >-var s_save_exec_hi = ttmp3 >-var s_save_status = ttmp4 >-var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine >-var s_wave_size = ttmp6 //ttmp6 is not needed now, since it's only 32bit xnack mask, now use it to determine wave32 or wave64 in EMU_HACK >-var s_save_xnack_mask = ttmp7 >-var s_save_buf_rsrc0 = ttmp8 >-var s_save_buf_rsrc1 = ttmp9 >-var s_save_buf_rsrc2 = ttmp10 >-var s_save_buf_rsrc3 = ttmp11 >- >-var s_save_mem_offset = ttmp14 >-var s_sgpr_save_num = 106 //in gfx10, all sgpr must be saved >-var s_save_alloc_size = s_save_trapsts //conflict >-var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) >-var s_save_m0 = ttmp15 >- >-/* Restore */ >-var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE >-var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC >- >-var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit >-var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 >-var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype >-var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 >-var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG >-var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 >- >-var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT >-var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK >-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK >- >-var s_restore_spi_init_lo = exec_lo >-var s_restore_spi_init_hi = exec_hi >- >-var s_restore_mem_offset = ttmp12 >-var s_restore_alloc_size = ttmp3 >-var s_restore_tmp = ttmp6 >-var s_restore_mem_offset_save = s_restore_tmp //no conflict >- >-var s_restore_m0 = s_restore_alloc_size //no conflict >- >-var s_restore_mode = ttmp13 >-var s_restore_hwid1 = ttmp2 >-var s_restore_ddid = s_restore_hwid1 >-var s_restore_pc_lo = ttmp0 >-var s_restore_pc_hi = ttmp1 >-var s_restore_exec_lo = ttmp14 >-var s_restore_exec_hi = ttmp15 >-var s_restore_status = ttmp4 >-var s_restore_trapsts = ttmp5 >-//var s_restore_xnack_mask_lo = xnack_mask_lo >-//var s_restore_xnack_mask_hi = xnack_mask_hi >-var s_restore_xnack_mask = ttmp7 >-var s_restore_buf_rsrc0 = ttmp8 >-var s_restore_buf_rsrc1 = ttmp9 >-var s_restore_buf_rsrc2 = ttmp10 >-var s_restore_buf_rsrc3 = ttmp11 >-var s_restore_size = ttmp13 //ttmp13 has no conflict >- >-/**************************************************************************/ >-/* trap handler entry points */ >-/**************************************************************************/ >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore >- //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC >- s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC >- s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. >- s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE >- //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE >- s_branch L_SKIP_RESTORE //NOT restore, SAVE actually >- else >- s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save >- end >+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save > > L_JUMP_TO_RESTORE: >- s_branch L_RESTORE //restore >+ s_branch L_RESTORE > > L_SKIP_RESTORE: >- >- s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC >- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save >- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save >- s_cbranch_scc1 L_SAVE //this is the operation for save >- >- // ********* Handle non-CWSR traps ******************* >- if (!EMU_RUN_HACK) >- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception >- s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. >- s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 >- >- L_EXCP_CASE: >- s_and_b32 ttmp1, ttmp1, 0xFFFF >- s_rfe_b64 [ttmp0, ttmp1] >- end >- // ********* End handling of non-CWSR traps ******************* >- >-/**************************************************************************/ >-/* save routine */ >-/**************************************************************************/ >+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC >+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK >+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save >+ s_cbranch_scc1 L_SAVE >+ >+ // If STATUS.MEM_VIOL is asserted then halt the wave to prevent >+ // the exception raising again and blocking context save. >+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK >+ s_cbranch_scc0 L_FETCH_2ND_TRAP >+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK >+ >+L_FETCH_2ND_TRAP: >+ // Preserve and clear scalar XNACK state before issuing scalar loads. >+ // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into >+ // unused space ttmp11[31:24]. >+ s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) >+ s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) >+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK >+ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) >+ s_or_b32 ttmp11, ttmp11, ttmp3 >+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK >+ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) >+ s_or_b32 ttmp11, ttmp11, ttmp3 >+ s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) >+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 >+ >+ // Read second-level TBA/TMA from first-level TMA and jump if available. >+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) >+ // ttmp12 holds SQ_WAVE_STATUS >+ s_getreg_b32 ttmp4, hwreg(HW_REG_SHADER_TMA_LO) >+ s_getreg_b32 ttmp5, hwreg(HW_REG_SHADER_TMA_HI) >+ s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 >+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA >+ s_waitcnt lgkmcnt(0) >+ s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA >+ s_waitcnt lgkmcnt(0) >+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] >+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set >+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler >+ >+L_NO_NEXT_TRAP: >+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK >+ s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. >+ s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 >+ s_addc_u32 ttmp1, ttmp1, 0 >+L_EXCP_CASE: >+ s_and_b32 ttmp1, ttmp1, 0xFFFF >+ >+ // Restore SQ_WAVE_IB_STS. >+ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) >+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK >+ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) >+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK >+ s_or_b32 ttmp2, ttmp2, ttmp3 >+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 >+ >+ // Restore SQ_WAVE_STATUS. >+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 >+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 >+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status > >-L_SAVE: >- >+ s_rfe_b64 [ttmp0, ttmp1] >+ >+L_SAVE: > //check whether there is mem_viol >- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK >+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK > s_cbranch_scc0 L_NO_PC_REWIND >- >+ > //if so, need rewind PC assuming GDS operation gets NACKed >- s_mov_b32 s_save_tmp, 0 //clear mem_viol bit >- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit >- s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] >- s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 >- s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc >+ s_mov_b32 s_save_tmp, 0 >+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit >+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] >+ s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 >+ s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 > > L_NO_PC_REWIND: >- s_mov_b32 s_save_tmp, 0 //clear saveCtx bit >- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit >+ s_mov_b32 s_save_tmp, 0 >+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit > >- //s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK >- //s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi >- s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) >- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT >- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT >- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp >- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY >- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp >- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS >- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG >+ s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) >+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) >+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT >+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp >+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) >+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp >+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE) >+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT >+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp >+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS >+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG > > s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp >- >- /* inform SPI the readiness and wait for SPI's go signal */ >- s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI >- s_mov_b32 s_save_exec_hi, exec_hi >- s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive >- if (EMU_RUN_HACK) >- >- else >- s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC >- end >- >- L_SLEEP: >- s_sleep 0x2 >- >- if (EMU_RUN_HACK) >- >- else >- s_cbranch_execz L_SLEEP >- end >- >- >- /* setup Resource Contants */ >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- //calculate wd_addr using absolute thread id >- v_readlane_b32 s_save_tmp, v9, 0 >- //determine it is wave32 or wave64 >- s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) >- s_cmp_eq_u32 s_wave_size, 0 >- s_cbranch_scc1 L_SAVE_WAVE32 >- s_lshr_b32 s_save_tmp, s_save_tmp, 6 //SAVE WAVE64 >- s_branch L_SAVE_CON >- L_SAVE_WAVE32: >- s_lshr_b32 s_save_tmp, s_save_tmp, 5 //SAVE WAVE32 >- L_SAVE_CON: >- s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end >- >- >- s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo >- s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE >- s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited >- s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC >- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK >- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position >- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC >- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK >- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position >- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE >- >- s_mov_b32 s_save_m0, m0 //save M0 >- >- /* global mem offset */ >- s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 >- s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) //get wave_save_size >- s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi >- >- /* save VGPRs */ >- ////////////////////////////// >- L_SAVE_VGPR: >- >- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI >- s_mov_b32 exec_hi, 0x00000000 >- s_branch L_SAVE_VGPR_NORMAL >- L_ENABLE_SAVE_VGPR_EXEC_HI: >- s_mov_b32 exec_hi, 0xFFFFFFFF >- L_SAVE_VGPR_NORMAL: >- s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size >- //for wave32 and wave64, the num of vgpr function is the same? >- s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 >- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible >- //determine it is wave32 or wave64 >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_SAVE_VGPR_WAVE64 >- >- //zhenxu added it for save vgpr for wave32 >- s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 7 //NUM_RECORDS in bytes (32 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_mov_b32 m0, 0x0 //VGPR initial index value =0 >- //s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 >- //s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later, doesn't need this in gfx10 >- >- L_SAVE_VGPR_WAVE32_LOOP: >- v_movrels_b32 v0, v0 //v0 = v[0+m0] >- >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- end >- >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //every buffer_store_dword does 128 bytes >- s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_VGPR_WAVE32_LOOP //VGPR save is complete? >- s_branch L_SAVE_LDS >- //save vgpr for wave32 ends >- >- L_SAVE_VGPR_WAVE64: >- s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_mov_b32 m0, 0x0 //VGPR initial index value =0 >- //s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 >- //s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later, doesn't need this in gfx10 >- >- L_SAVE_VGPR_WAVE64_LOOP: >- v_movrels_b32 v0, v0 //v0 = v[0+m0] >- >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- end >- >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes >- s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_VGPR_WAVE64_LOOP //VGPR save is complete? >- //s_set_gpr_idx_off >- // >- //Below part will be the save shared vgpr part (new for gfx10) >- s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size >- s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? >- s_cbranch_scc0 L_SAVE_LDS //no shared_vgpr used? jump to L_SAVE_LDS >- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) >- //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. >- //save shared_vgpr will start from the index of m0 >- s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 >- s_mov_b32 exec_lo, 0xFFFFFFFF >- s_mov_b32 exec_hi, 0x00000000 >- L_SAVE_SHARED_VGPR_WAVE64_LOOP: >- v_movrels_b32 v0, v0 //v0 = v[0+m0] >- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //every buffer_store_dword does 256 bytes >- s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? >- >- /* save LDS */ >- ////////////////////////////// >- L_SAVE_LDS: >- >- //Only check the first wave need LDS >- /* the first wave in the threadgroup */ >- s_barrier //FIXME not performance-optimal "LDS is used? wait for other waves in the same TG" >- s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here >- s_cbranch_scc0 L_SAVE_SGPR >- >- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI >- s_mov_b32 exec_hi, 0x00000000 >- s_branch L_SAVE_LDS_NORMAL >- L_ENABLE_SAVE_LDS_EXEC_HI: >- s_mov_b32 exec_hi, 0xFFFFFFFF >- L_SAVE_LDS_NORMAL: >- s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size >- s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? >- s_cbranch_scc0 L_SAVE_SGPR //no lds used? jump to L_SAVE_VGPR >- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw >- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes >- s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- //load 0~63*4(byte address) to vgpr v15 >- v_mbcnt_lo_u32_b32 v0, -1, 0 >- v_mbcnt_hi_u32_b32 v0, -1, v0 >- v_mul_u32_u24 v0, 4, v0 >- >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_mov_b32 m0, 0x0 >- s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 >- >- L_SAVE_LDS_LOOP_W32: >- if (SAVE_LDS) >- ds_read_b32 v1, v0 >- s_waitcnt 0 //ensure data ready >- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- //buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 //save lds to memory doesn't exist in 10 >- end >- s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //mem offset increased by 128 bytes >- v_add_nc_u32 v0, v0, 128 >- s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? >- s_branch L_SAVE_SGPR >- >- L_SAVE_LDS_LOOP_W64: >- if (SAVE_LDS) >- ds_read_b32 v1, v0 >- s_waitcnt 0 //ensure data ready >- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- //buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 //save lds to memory doesn't exist in 10 >- end >- s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //mem offset increased by 256 bytes >- v_add_nc_u32 v0, v0, 256 >- s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? >- >- >- /* save SGPRs */ >- ////////////////////////////// >- //s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size >- //s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 >- //s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) >- //s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //In gfx10, Number of SGPRs = (sgpr_size + 1) * 8 (non-zero value) >- L_SAVE_SGPR: >- //need to look at it is wave32 or wave64 >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_SAVE_SGPR_VMEM_WAVE64 >- if (SGPR_SAVE_USE_SQC) >- s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 7 //NUM_RECORDS in bytes (32 threads) >- end >- s_branch L_SAVE_SGPR_CONT >- L_SAVE_SGPR_VMEM_WAVE64: >- if (SGPR_SAVE_USE_SQC) >- s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 8 //NUM_RECORDS in bytes (64 threads) >- end >- L_SAVE_SGPR_CONT: >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- //s_mov_b32 m0, 0x0 //SGPR initial index value =0 >- //s_nop 0x0 //Manually inserted wait states >- >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- >- s_mov_b32 m0, 0x0 //SGPR initial index value =0 >- s_nop 0x0 //Manually inserted wait states >- >- s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE64 >- >- L_SAVE_SGPR_LOOP_WAVE32: >- s_movrels_b32 s0, s0 //s0 = s[0+m0] >- //zhenxu, adding one more argument to save sgpr function, this is only for vmem, using sqc is not change >- write_sgpr_to_mem_wave32(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 >- s_add_u32 m0, m0, 1 //next sgpr index >- s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_sgpr_save_num) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE32 //SGPR save is complete? >- s_branch L_SAVE_HWREG >- >- L_SAVE_SGPR_LOOP_WAVE64: >- s_movrels_b32 s0, s0 //s0 = s[0+m0] >- //zhenxu, adding one more argument to save sgpr function, this is only for vmem, using sqc is not change >- write_sgpr_to_mem_wave64(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 >- s_add_u32 m0, m0, 1 //next sgpr index >- s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_sgpr_save_num) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE64 //SGPR save is complete? >- >- >- /* save HW registers */ >- ////////////////////////////// >- L_SAVE_HWREG: >- s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_SAVE_HWREG_WAVE64 >- >- write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 >- >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- end >- >- write_sgpr_to_mem_wave32(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC >- write_sgpr_to_mem_wave32(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- write_sgpr_to_mem_wave32(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC >- write_sgpr_to_mem_wave32(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- write_sgpr_to_mem_wave32(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS >- >- //s_save_trapsts conflicts with s_save_alloc_size >- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >- write_sgpr_to_mem_wave32(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS >- >- //write_sgpr_to_mem_wave32(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO >- write_sgpr_to_mem_wave32(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI >- >- //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 >- s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE >- write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- if(SAVE_RESTORE_HWID_DDID) >- s_getreg_b32 s_save_m0, hwreg(HW_REG_HW_ID1) //HW_ID1, handler records the SE/SA/WGP/SIMD/wave of the original wave >- write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- end >- s_branch L_S_PGM_END_SAVED >- >- L_SAVE_HWREG_WAVE64: >- write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 >- >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- end >- >- write_sgpr_to_mem_wave64(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC >- write_sgpr_to_mem_wave64(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- write_sgpr_to_mem_wave64(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC >- write_sgpr_to_mem_wave64(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- write_sgpr_to_mem_wave64(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS >- >- //s_save_trapsts conflicts with s_save_alloc_size >- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >- write_sgpr_to_mem_wave64(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS >- >- //write_sgpr_to_mem_wave64(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO >- write_sgpr_to_mem_wave64(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI >- >- //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 >- s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE >- write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- >- >- if(SAVE_RESTORE_HWID_DDID) >- s_getreg_b32 s_save_m0, hwreg(HW_REG_HW_ID1) //HW_ID1, handler records the SE/SA/WGP/SIMD/wave of the original wave >- write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- >- /* save DDID */ >- ////////////////////////////// >- L_SAVE_DDID: >- //EXEC has been saved, no vector inst following >- s_mov_b32 exec_lo, 0x80000000 //Set MSB to 1. Cleared when draw index is returned >- s_sendmsg sendmsg(MSG_GET_DDID) >- >- L_WAIT_DDID_LOOP: >- s_nop 7 // sleep a bit >- s_bitcmp0_b32 exec_lo, 31 // test to see if MSB is cleared, meaning done >- s_cbranch_scc0 L_WAIT_DDID_LOOP >- >- s_mov_b32 s_save_m0, exec_lo >- >- >- s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_SAVE_DDID_WAVE64 >- >- write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- >- L_SAVE_DDID_WAVE64: >- write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) >- >- end >- >- L_S_PGM_END_SAVED: >- /* S_PGM_END_SAVED */ //FIXME graphics ONLY >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) >- s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- s_rfe_b64 s_save_pc_lo //Return to the main shader program >- else >- end >- >- >- s_branch L_END_PGM >- >- >- >-/**************************************************************************/ >-/* restore routine */ >-/**************************************************************************/ >+ >+ /* inform SPI the readiness and wait for SPI's go signal */ >+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI >+ s_mov_b32 s_save_exec_hi, exec_hi >+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive >+ >+ s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC >+ >+L_SLEEP: >+ // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause >+ // SQ hang, since the 7,8th wave could not get arbit to exec inst, while >+ // other waves are stuck into the sleep-loop and waiting for wrexec!=0 >+ s_sleep 0x2 >+ s_cbranch_execz L_SLEEP >+ >+ /* setup Resource Contants */ >+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo >+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi >+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE >+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited >+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC >+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK >+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) >+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC >+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK >+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) >+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE >+ >+ s_mov_b32 s_save_m0, m0 >+ >+ /* global mem offset */ >+ s_mov_b32 s_save_mem_offset, 0x0 >+ s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) >+ s_lshl_b32 s_wave_size, s_wave_size, S_WAVE_SIZE >+ s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi, it's at bit25 >+ >+ /* save HW registers */ >+ >+L_SAVE_HWREG: >+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) >+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) >+ get_svgpr_size_bytes(s_save_tmp) >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() >+ >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) >+ write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) >+ write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) >+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO) >+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) >+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ /* the first wave in the threadgroup */ >+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK >+ s_mov_b32 s_save_exec_hi, 0x0 >+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] >+ >+ /* save SGPRs */ >+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... >+ >+ // SGPR SR memory offset : size(VGPR)+size(SVGPR) >+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) >+ get_svgpr_size_bytes(s_save_tmp) >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 >+ s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0 >+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset >+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 >+ >+ s_mov_b32 m0, 0x0 //SGPR initial index value =0 >+ s_nop 0x0 //Manually inserted wait states >+L_SAVE_SGPR_LOOP: >+ // SGPR is allocated in 16 SGPR granularity >+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] >+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] >+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] >+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] >+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] >+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] >+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] >+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] >+ >+ write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) >+ s_add_u32 m0, m0, 16 //next sgpr index >+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? >+ >+ //save the rest 12 SGPR >+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] >+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] >+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] >+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] >+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] >+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] >+ write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ // restore s_save_buf_rsrc0,1 >+ s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask >+ >+ /* save first 4 VGPR, then LDS save could use */ >+ // each wave will alloc 4 vgprs at least... >+ >+ s_mov_b32 s_save_mem_offset, 0 >+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI >+ s_mov_b32 exec_hi, 0x00000000 >+ s_branch L_SAVE_4VGPR_WAVE32 >+L_ENABLE_SAVE_4VGPR_EXEC_HI: >+ s_mov_b32 exec_hi, 0xFFFFFFFF >+ s_branch L_SAVE_4VGPR_WAVE64 >+L_SAVE_4VGPR_WAVE32: >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR Allocated in 4-GPR granularity >+ >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 >+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 >+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 >+ s_branch L_SAVE_LDS >+ >+L_SAVE_4VGPR_WAVE64: >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR Allocated in 4-GPR granularity >+ >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 >+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 >+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >+ >+ /* save LDS */ >+ >+L_SAVE_LDS: >+ // Change EXEC to all threads... >+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI >+ s_mov_b32 exec_hi, 0x00000000 >+ s_branch L_SAVE_LDS_NORMAL >+L_ENABLE_SAVE_LDS_EXEC_HI: >+ s_mov_b32 exec_hi, 0xFFFFFFFF >+L_SAVE_LDS_NORMAL: >+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) >+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? >+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE >+ >+ s_barrier //LDS is used? wait for other waves in the same TG >+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK >+ s_cbranch_scc0 L_SAVE_LDS_DONE >+ >+ // first wave do LDS save; >+ >+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw >+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes >+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes >+ >+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) >+ // >+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) >+ get_svgpr_size_bytes(s_save_tmp) >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() >+ >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ //load 0~63*4(byte address) to vgpr v0 >+ v_mbcnt_lo_u32_b32 v0, -1, 0 >+ v_mbcnt_hi_u32_b32 v0, -1, v0 >+ v_mul_u32_u24 v0, 4, v0 >+ >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_mov_b32 m0, 0x0 >+ s_cbranch_scc1 L_SAVE_LDS_W64 >+ >+L_SAVE_LDS_W32: >+ s_mov_b32 s3, 128 >+ s_nop 0 >+ s_nop 0 >+ s_nop 0 >+L_SAVE_LDS_LOOP_W32: >+ ds_read_b32 v1, v0 >+ s_waitcnt 0 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ >+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 >+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes >+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? >+ >+ s_branch L_SAVE_LDS_DONE >+ >+L_SAVE_LDS_W64: >+ s_mov_b32 s3, 256 >+ s_nop 0 >+ s_nop 0 >+ s_nop 0 >+L_SAVE_LDS_LOOP_W64: >+ ds_read_b32 v1, v0 >+ s_waitcnt 0 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ >+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 >+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes >+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? >+ >+L_SAVE_LDS_DONE: >+ /* save VGPRs - set the Rest VGPRs */ >+L_SAVE_VGPR: >+ // VGPR SR memory offset: 0 >+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI >+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs >+ s_mov_b32 exec_hi, 0x00000000 >+ s_branch L_SAVE_VGPR_NORMAL >+L_ENABLE_SAVE_VGPR_EXEC_HI: >+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs >+ s_mov_b32 exec_hi, 0xFFFFFFFF >+L_SAVE_VGPR_NORMAL: >+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) >+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 >+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) >+ //determine it is wave32 or wave64 >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64 >+ >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR Allocated in 4-GPR granularity >+ >+ // VGPR store using dw burst >+ s_mov_b32 m0, 0x4 //VGPR initial index value =4 >+ s_cmp_lt_u32 m0, s_save_alloc_size >+ s_cbranch_scc0 L_SAVE_VGPR_END >+ >+L_SAVE_VGPR_W32_LOOP: >+ v_movrels_b32 v0, v0 //v0 = v[0+m0] >+ v_movrels_b32 v1, v1 //v1 = v[1+m0] >+ v_movrels_b32 v2, v2 //v2 = v[2+m0] >+ v_movrels_b32 v3, v3 //v3 = v[3+m0] >+ >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 >+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 >+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 >+ >+ s_add_u32 m0, m0, 4 //next vgpr index >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes >+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? >+ >+ s_branch L_SAVE_VGPR_END >+ >+L_SAVE_VGPR_WAVE64: >+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR store using dw burst >+ s_mov_b32 m0, 0x4 //VGPR initial index value =4 >+ s_cmp_lt_u32 m0, s_save_alloc_size >+ s_cbranch_scc0 L_SAVE_VGPR_END >+ >+L_SAVE_VGPR_W64_LOOP: >+ v_movrels_b32 v0, v0 //v0 = v[0+m0] >+ v_movrels_b32 v1, v1 //v1 = v[1+m0] >+ v_movrels_b32 v2, v2 //v2 = v[2+m0] >+ v_movrels_b32 v3, v3 //v3 = v[3+m0] >+ >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 >+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 >+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >+ >+ s_add_u32 m0, m0, 4 //next vgpr index >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes >+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? >+ >+ //Below part will be the save shared vgpr part (new for gfx10) >+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) >+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? >+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS >+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) >+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. >+ //save shared_vgpr will start from the index of m0 >+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 >+ s_mov_b32 exec_lo, 0xFFFFFFFF >+ s_mov_b32 exec_hi, 0x00000000 >+L_SAVE_SHARED_VGPR_WAVE64_LOOP: >+ v_movrels_b32 v0, v0 //v0 = v[0+m0] >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ s_add_u32 m0, m0, 1 //next vgpr index >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 >+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? >+ >+L_SAVE_VGPR_END: >+ s_branch L_END_PGM > > L_RESTORE: >- /* Setup Resource Contants */ >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) >- //calculate wd_addr using absolute thread id >- v_readlane_b32 s_restore_tmp, v9, 0 >- //determine it is wave32 or wave64 >- s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) //change to ttmp13 >- s_cmp_eq_u32 s_restore_size, 0 >- s_cbranch_scc1 L_RESTORE_WAVE32 >- s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 //SAVE WAVE64 >- s_branch L_RESTORE_CON >- L_RESTORE_WAVE32: >- s_lshr_b32 s_restore_tmp, s_restore_tmp, 5 //SAVE WAVE32 >- L_RESTORE_CON: >- s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE >- s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL >- else >- end >- >- s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo >- s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi >- s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE >- s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) >- s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC >- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK >- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position >- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC >- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK >- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position >- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE >- //determine it is wave32 or wave64 >- s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) >- s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size //share s_wave_size with exec_hi >- >- /* global mem offset */ >- s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 >- >- /* restore VGPRs */ >- ////////////////////////////// >- L_RESTORE_VGPR: >- >- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI >- s_mov_b32 exec_hi, 0x00000000 >- s_branch L_RESTORE_VGPR_NORMAL >- L_ENABLE_RESTORE_VGPR_EXEC_HI: >- s_mov_b32 exec_hi, 0xFFFFFFFF >- L_RESTORE_VGPR_NORMAL: >- s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size >- s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 >- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) >- //determine it is wave32 or wave64 >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 >- >- s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 7 //NUM_RECORDS in bytes (32 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 >- s_mov_b32 m0, 1 //VGPR initial index value = 1 >- //s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 >- //s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later, might not need this in gfx10 >- >- L_RESTORE_VGPR_WAVE32_LOOP: >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >- end >- s_waitcnt vmcnt(0) //ensure data ready >- v_movreld_b32 v0, v0 //v[0+m0] = v0 >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //every buffer_load_dword does 128 bytes >- s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? >- //s_set_gpr_idx_off >- /* VGPR restore on v0 */ >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 >- end >- >- s_branch L_RESTORE_LDS >- >- L_RESTORE_VGPR_WAVE64: >- s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 >- s_mov_b32 m0, 1 //VGPR initial index value = 1 >- L_RESTORE_VGPR_WAVE64_LOOP: >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >- end >- s_waitcnt vmcnt(0) //ensure data ready >- v_movreld_b32 v0, v0 //v[0+m0] = v0 >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes >- s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? >- //s_set_gpr_idx_off >- // >- //Below part will be the restore shared vgpr part (new for gfx10) >- s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size >- s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? >- s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? jump to L_SAVE_LDS >- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) >- //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. >- //restore shared_vgpr will start from the index of m0 >- s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 >- s_mov_b32 exec_lo, 0xFFFFFFFF >- s_mov_b32 exec_hi, 0x00000000 >- L_RESTORE_SHARED_VGPR_WAVE64_LOOP: >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >- s_waitcnt vmcnt(0) //ensure data ready >- v_movreld_b32 v0, v0 //v[0+m0] = v0 >- s_add_u32 m0, m0, 1 //next vgpr index >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //every buffer_load_dword does 256 bytes >- s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? >- >- s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! >- >- /* VGPR restore on v0 */ >- L_RESTORE_V0: >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 >- end >- >- >- /* restore LDS */ >- ////////////////////////////// >- L_RESTORE_LDS: >- >- //Only need to check the first wave >- /* the first wave in the threadgroup */ >- s_and_b32 s_restore_tmp, s_restore_size, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK >- s_cbranch_scc0 L_RESTORE_SGPR >- >- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI >- s_mov_b32 exec_hi, 0x00000000 >- s_branch L_RESTORE_LDS_NORMAL >- L_ENABLE_RESTORE_LDS_EXEC_HI: >- s_mov_b32 exec_hi, 0xFFFFFFFF >- L_RESTORE_LDS_NORMAL: >- s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size >- s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? >- s_cbranch_scc0 L_RESTORE_SGPR //no lds used? jump to L_RESTORE_VGPR >- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw >- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes >- s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_and_b32 m0, s_wave_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_mov_b32 m0, 0x0 >- s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 >- >- L_RESTORE_LDS_LOOP_W32: >- if (SAVE_LDS) >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 >- s_waitcnt 0 >- end >- s_add_u32 m0, m0, 128 //every buffer_load_dword does 256 bytes >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 256 bytes >- s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? >- s_branch L_RESTORE_SGPR >- >- L_RESTORE_LDS_LOOP_W64: >- if (SAVE_LDS) >- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 >- s_waitcnt 0 >- end >- s_add_u32 m0, m0, 256 //every buffer_load_dword does 256 bytes >- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256 bytes >- s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? >- >- >- /* restore SGPRs */ >- ////////////////////////////// >- //s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size >- //s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 >- //s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) >- //s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SGPRs = (sgpr_size + 1) * 8 (non-zero value) >- L_RESTORE_SGPR: >- //need to look at it is wave32 or wave64 >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_RESTORE_SGPR_VMEM_WAVE64 >- if (SGPR_SAVE_USE_SQC) >- s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 7 //NUM_RECORDS in bytes (32 threads) >- end >- s_branch L_RESTORE_SGPR_CONT >- L_RESTORE_SGPR_VMEM_WAVE64: >- if (SGPR_SAVE_USE_SQC) >- s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 8 //NUM_RECORDS in bytes (64 threads) >- end >- >- L_RESTORE_SGPR_CONT: >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_RESTORE_SGPR_WAVE64 >- >- read_sgpr_from_mem_wave32(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp >- s_mov_b32 m0, 0x1 >- >- L_RESTORE_SGPR_LOOP_WAVE32: >- read_sgpr_from_mem_wave32(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made >- s_waitcnt lgkmcnt(0) //ensure data ready >- s_movreld_b32 s0, s0 //s[0+m0] = s0 >- s_nop 0 // hazard SALU M0=> S_MOVREL >- s_add_u32 m0, m0, 1 //next sgpr index >- s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_SGPR_LOOP_WAVE32 //SGPR restore (except s0) is complete? >- s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ >- s_branch L_RESTORE_HWREG >- >- L_RESTORE_SGPR_WAVE64: >- read_sgpr_from_mem_wave64(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp >- s_mov_b32 m0, 0x1 //SGPR initial index value =1 //go on with with s1 >- >- L_RESTORE_SGPR_LOOP_WAVE64: >- read_sgpr_from_mem_wave64(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made >- s_waitcnt lgkmcnt(0) //ensure data ready >- s_movreld_b32 s0, s0 //s[0+m0] = s0 >- s_nop 0 // hazard SALU M0=> S_MOVREL >- s_add_u32 m0, m0, 1 //next sgpr index >- s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_RESTORE_SGPR_LOOP_WAVE64 //SGPR restore (except s0) is complete? >- s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ >- >- >- /* restore HW registers */ >- ////////////////////////////// >- L_RESTORE_HWREG: >- s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_RESTORE_HWREG_WAVE64 >- >- read_sgpr_from_mem_wave32(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 >- read_sgpr_from_mem_wave32(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC >- read_sgpr_from_mem_wave32(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- read_sgpr_from_mem_wave32(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC >- read_sgpr_from_mem_wave32(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- read_sgpr_from_mem_wave32(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS >- read_sgpr_from_mem_wave32(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS >- //read_sgpr_from_mem_wave32(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO >- //read_sgpr_from_mem_wave32(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI >- read_sgpr_from_mem_wave32(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK >- read_sgpr_from_mem_wave32(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE >- if(SAVE_RESTORE_HWID_DDID) >- read_sgpr_from_mem_wave32(s_restore_hwid1, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //HW_ID1 >- end >- s_branch L_RESTORE_HWREG_FINISH >- >- L_RESTORE_HWREG_WAVE64: >- read_sgpr_from_mem_wave64(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 >- read_sgpr_from_mem_wave64(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC >- read_sgpr_from_mem_wave64(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- read_sgpr_from_mem_wave64(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC >- read_sgpr_from_mem_wave64(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- read_sgpr_from_mem_wave64(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS >- read_sgpr_from_mem_wave64(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS >- //read_sgpr_from_mem_wave64(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO >- //read_sgpr_from_mem_wave64(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI >- read_sgpr_from_mem_wave64(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK >- read_sgpr_from_mem_wave64(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE >- if(SAVE_RESTORE_HWID_DDID) >- read_sgpr_from_mem_wave64(s_restore_hwid1, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //HW_ID1 >- end >- L_RESTORE_HWREG_FINISH: >- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS >- >- >- >- if(SAVE_RESTORE_HWID_DDID) >- L_RESTORE_DDID: >- s_mov_b32 m0, s_restore_hwid1 //virture ttrace support: The save-context handler records the SE/SA/WGP/SIMD/wave of the original wave >- s_ttracedata //and then can output it as SHADER_DATA to ttrace on restore to provide a correlation across the save-restore >- >- s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else >- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- s_and_b32 m0, s_restore_size, 1 >- s_cmp_eq_u32 m0, 1 >- s_cbranch_scc1 L_RESTORE_DDID_WAVE64 >- >- read_sgpr_from_mem_wave32(s_restore_ddid, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- s_branch L_RESTORE_DDID_FINISH >- L_RESTORE_DDID_WAVE64: >- read_sgpr_from_mem_wave64(s_restore_ddid, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) >- >- L_RESTORE_DDID_FINISH: >- s_waitcnt lgkmcnt(0) >- //s_mov_b32 m0, s_restore_ddid >- //s_ttracedata >- if (RESTORE_DDID_IN_SGPR18) >- s_mov_b32 s18, s_restore_ddid >- end >- >- end >- >- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS >- >- //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) >- s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) >- s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over >- end >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) >- s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal >- s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over >- end >- >- s_mov_b32 m0, s_restore_m0 >- s_mov_b32 exec_lo, s_restore_exec_lo >- s_mov_b32 exec_hi, s_restore_exec_hi >- >- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts >+ /* Setup Resource Contants */ >+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo >+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi >+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE >+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) >+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC >+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK >+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) >+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC >+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK >+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) >+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE >+ //determine it is wave32 or wave64 >+ s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) >+ s_lshl_b32 s_restore_size, s_restore_size, S_WAVE_SIZE >+ s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size >+ >+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK >+ s_cbranch_scc0 L_RESTORE_VGPR >+ >+ /* restore LDS */ >+L_RESTORE_LDS: >+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI >+ s_mov_b32 exec_hi, 0x00000000 >+ s_branch L_RESTORE_LDS_NORMAL >+L_ENABLE_RESTORE_LDS_EXEC_HI: >+ s_mov_b32 exec_hi, 0xFFFFFFFF >+L_RESTORE_LDS_NORMAL: >+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) >+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? >+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR >+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw >+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes >+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes >+ >+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) >+ // >+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) >+ get_svgpr_size_bytes(s_restore_tmp) >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() >+ >+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_mov_b32 m0, 0x0 >+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 >+ >+L_RESTORE_LDS_LOOP_W32: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW >+ s_add_u32 m0, m0, 128 // 128 DW >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW >+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? >+ s_branch L_RESTORE_VGPR >+ >+L_RESTORE_LDS_LOOP_W64: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW >+ s_add_u32 m0, m0, 256 // 256 DW >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW >+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? >+ >+ /* restore VGPRs */ >+L_RESTORE_VGPR: >+ // VGPR SR memory offset : 0 >+ s_mov_b32 s_restore_mem_offset, 0x0 >+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on >+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI >+ s_mov_b32 exec_hi, 0x00000000 >+ s_branch L_RESTORE_VGPR_NORMAL >+L_ENABLE_RESTORE_VGPR_EXEC_HI: >+ s_mov_b32 exec_hi, 0xFFFFFFFF >+L_RESTORE_VGPR_NORMAL: >+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) >+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 >+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) >+ //determine it is wave32 or wave64 >+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 >+ >+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR load using dw burst >+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 >+ s_mov_b32 m0, 4 //VGPR initial index value = 4 >+ >+L_RESTORE_VGPR_WAVE32_LOOP: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128 >+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2 >+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3 >+ s_waitcnt vmcnt(0) >+ v_movreld_b32 v0, v0 //v[0+m0] = v0 >+ v_movreld_b32 v1, v1 >+ v_movreld_b32 v2, v2 >+ v_movreld_b32 v3, v3 >+ s_add_u32 m0, m0, 4 //next vgpr index >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes >+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? >+ >+ /* VGPR restore on v0 */ >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 >+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128 >+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2 >+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3 >+ >+ s_branch L_RESTORE_SGPR >+ >+L_RESTORE_VGPR_WAVE64: >+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ // VGPR load using dw burst >+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 >+ s_mov_b32 m0, 4 //VGPR initial index value = 4 >+ >+L_RESTORE_VGPR_WAVE64_LOOP: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 >+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 >+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 >+ s_waitcnt vmcnt(0) >+ v_movreld_b32 v0, v0 //v[0+m0] = v0 >+ v_movreld_b32 v1, v1 >+ v_movreld_b32 v2, v2 >+ v_movreld_b32 v3, v3 >+ s_add_u32 m0, m0, 4 //next vgpr index >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes >+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? >+ >+ //Below part will be the restore shared vgpr part (new for gfx10) >+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size >+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? >+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? >+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) >+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. >+ //restore shared_vgpr will start from the index of m0 >+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 >+ s_mov_b32 exec_lo, 0xFFFFFFFF >+ s_mov_b32 exec_hi, 0x00000000 >+L_RESTORE_SHARED_VGPR_WAVE64_LOOP: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >+ s_waitcnt vmcnt(0) >+ v_movreld_b32 v0, v0 //v[0+m0] = v0 >+ s_add_u32 m0, m0, 1 //next vgpr index >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 >+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 >+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? >+ >+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! >+ >+ /* VGPR restore on v0 */ >+L_RESTORE_V0: >+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 >+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 >+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 >+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 >+ >+ /* restore SGPRs */ >+ //will be 2+8+16*6 >+ // SGPR SR memory offset : size(VGPR)+size(SVGPR) >+L_RESTORE_SGPR: >+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) >+ get_svgpr_size_bytes(s_restore_tmp) >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() >+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved >+ >+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ s_mov_b32 m0, s_sgpr_save_num >+ >+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) >+ s_waitcnt lgkmcnt(0) >+ >+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] >+ s_nop 0 // hazard SALU M0=> S_MOVREL >+ >+ s_movreld_b64 s0, s0 //s[0+m0] = s0 >+ s_movreld_b64 s2, s2 >+ >+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) >+ s_waitcnt lgkmcnt(0) >+ >+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] >+ s_nop 0 // hazard SALU M0=> S_MOVREL >+ >+ s_movreld_b64 s0, s0 //s[0+m0] = s0 >+ s_movreld_b64 s2, s2 >+ s_movreld_b64 s4, s4 >+ s_movreld_b64 s6, s6 >+ >+ L_RESTORE_SGPR_LOOP: >+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) >+ s_waitcnt lgkmcnt(0) >+ >+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] >+ s_nop 0 // hazard SALU M0=> S_MOVREL >+ >+ s_movreld_b64 s0, s0 //s[0+m0] = s0 >+ s_movreld_b64 s2, s2 >+ s_movreld_b64 s4, s4 >+ s_movreld_b64 s6, s6 >+ s_movreld_b64 s8, s8 >+ s_movreld_b64 s10, s10 >+ s_movreld_b64 s12, s12 >+ s_movreld_b64 s14, s14 >+ >+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 >+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP >+ >+ /* restore HW registers */ >+L_RESTORE_HWREG: >+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) >+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) >+ get_svgpr_size_bytes(s_restore_tmp) >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp >+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() >+ >+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >+ >+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) >+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) >+ s_waitcnt lgkmcnt(0) >+ >+ s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch >+ >+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) >+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS >+ >+ s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch >+ >+ s_mov_b32 s_restore_tmp, s_restore_pc_hi >+ s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS >+ >+ s_mov_b32 m0, s_restore_m0 >+ s_mov_b32 exec_lo, s_restore_exec_lo >+ s_mov_b32 exec_hi, s_restore_exec_hi >+ >+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts > s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 >- s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask //restore xnack_mask >- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts >- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT >+ s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask >+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts >+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT > s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 >- //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore >- s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode >- //reuse s_restore_m0 as a temp register >- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK >- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT >- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT >- s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero >- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 >- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK >- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT >- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 >- s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK >- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT >- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp >- s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status >- >- s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time >- >- >-// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution >- s_rfe_b64 s_restore_pc_lo // s_restore_m0[0] is used to set STATUS.inst_atc >- >- >-/**************************************************************************/ >-/* the END */ >-/**************************************************************************/ >-L_END_PGM: >+ s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode >+ s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK >+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT >+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT >+ s_mov_b32 s_restore_mode, 0x0 >+ s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 >+ s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK >+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT >+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT >+ s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 >+ s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_REPLAY_W64H_MASK >+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT >+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT >+ s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 >+ >+ s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK >+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT >+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode >+ >+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 >+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 >+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu >+ >+ s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG >+ >+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution >+ >+L_END_PGM: > s_endpgm >- >-end >+end >+ >+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) >+ s_mov_b32 exec_lo, m0 >+ s_mov_b32 m0, s_mem_offset >+ s_buffer_store_dword s, s_rsrc, m0 glc:1 >+ s_add_u32 s_mem_offset, s_mem_offset, 4 >+ s_mov_b32 m0, exec_lo >+end >+ >+ >+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) >+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 >+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 >+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 >+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 >+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 >+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 >+end >+ >+function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset) >+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 >+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 >+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 >+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*12 >+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 >+end >+ >+ >+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) >+ s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 >+ s_add_u32 s_mem_offset, s_mem_offset, 4 >+end >+ >+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) >+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16 >+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 >+end >+ >+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) >+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8 >+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset glc:1 >+end >+ >+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) >+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4 >+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1 >+end > > >-/**************************************************************************/ >-/* the helper functions */ >-/**************************************************************************/ >-function write_sgpr_to_mem_wave32(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) >- if (use_sqc) >- s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on >- s_mov_b32 m0, s_mem_offset >- s_buffer_store_dword s, s_rsrc, m0 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 4 >- s_mov_b32 m0, exec_lo >- elsif (use_mtbuf) >- v_mov_b32 v0, s >- tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 128 >- else >- v_mov_b32 v0, s >- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 128 >- end >+function get_lds_size_bytes(s_lds_size_byte) >+ s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) >+ s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW > end > >-function write_sgpr_to_mem_wave64(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) >- if (use_sqc) >- s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on >- s_mov_b32 m0, s_mem_offset >- s_buffer_store_dword s, s_rsrc, m0 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 4 >- s_mov_b32 m0, exec_lo >- elsif (use_mtbuf) >- v_mov_b32 v0, s >- tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 256 >- else >- v_mov_b32 v0, s >- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 >- s_add_u32 s_mem_offset, s_mem_offset, 256 >- end >+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) >+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) >+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 >+ s_lshr_b32 m0, s_size, S_WAVE_SIZE >+ s_and_b32 m0, m0, 1 >+ s_cmp_eq_u32 m0, 1 >+ s_cbranch_scc1 L_ENABLE_SHIFT_W64 >+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) >+ s_branch L_SHIFT_DONE >+L_ENABLE_SHIFT_W64: >+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) >+L_SHIFT_DONE: > end > >-function read_sgpr_from_mem_wave32(s, s_rsrc, s_mem_offset, use_sqc) >- s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 >- if (use_sqc) >- s_add_u32 s_mem_offset, s_mem_offset, 4 >- else >- s_add_u32 s_mem_offset, s_mem_offset, 128 >- end >+function get_svgpr_size_bytes(s_svgpr_size_byte) >+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) >+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) > end > >-function read_sgpr_from_mem_wave64(s, s_rsrc, s_mem_offset, use_sqc) >- s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 >- if (use_sqc) >- s_add_u32 s_mem_offset, s_mem_offset, 4 >- else >- s_add_u32 s_mem_offset, s_mem_offset, 256 >- end >+function get_sgpr_size_bytes >+ return 512 > end > >+function get_hwreg_size_bytes >+ return 128 >+end >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 2019-08-31 15:01:11.853736168 -0500 >@@ -24,78 +24,6 @@ > * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex > */ > >-/* HW (VI) source code for CWSR trap handler */ >-/* Version 18 + multiple trap handler */ >- >-// this performance-optimal version was originally from Seven Xu at SRDC >- >-// Revison #18 --... >-/* Rev History >-** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) >-** #4. SR Memory Layout: >-** 1. VGPR-SGPR-HWREG-{LDS} >-** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. >-** #5. Update: 1. Accurate g8sr_ts_save_d timestamp >-** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) >-** #7. Update: 1. don't barrier if noLDS >-** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version >-** 2. Fix SQ issue by s_sleep 2 >-** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last >-** 2. optimize s_buffer save by burst 16sgprs... >-** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. >-** #11. Update 1. Add 2 more timestamp for debug version >-** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance >-** #13. Integ 1. Always use MUBUF for PV trap shader... >-** #14. Update 1. s_buffer_store soft clause... >-** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. >-** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree >-** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] >-** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... >-** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 >-** 2. FUNC - Handle non-CWSR traps >-*/ >- >-var G8SR_WDMEM_HWREG_OFFSET = 0 >-var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes >- >-// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. >- >-var G8SR_DEBUG_TIMESTAMP = 0 >-var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset >-var s_g8sr_ts_save_s = s[34:35] // save start >-var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi >-var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ >-var s_g8sr_ts_save_d = s[40:41] // save end >-var s_g8sr_ts_restore_s = s[42:43] // restore start >-var s_g8sr_ts_restore_d = s[44:45] // restore end >- >-var G8SR_VGPR_SR_IN_DWX4 = 0 >-var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes >-var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 >- >- >-/*************************************************************************/ >-/* control on how to run the shader */ >-/*************************************************************************/ >-//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) >-var EMU_RUN_HACK = 0 >-var EMU_RUN_HACK_RESTORE_NORMAL = 0 >-var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 >-var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 >-var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK >-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK >-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK >-var SAVE_LDS = 1 >-var WG_BASE_ADDR_LO = 0x9000a000 >-var WG_BASE_ADDR_HI = 0x0 >-var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem >-var CTX_SAVE_CONTROL = 0x0 >-var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL >-var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) >-var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write >-var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes >-var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing >- > /**************************************************************************/ > /* variables */ > /**************************************************************************/ >@@ -226,16 +154,7 @@ > type(CS) > > >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore >- //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC >- s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC >- s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. >- s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE >- //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE >- s_branch L_SKIP_RESTORE //NOT restore, SAVE actually >- else > s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save >- end > > L_JUMP_TO_RESTORE: > s_branch L_RESTORE //restore >@@ -249,7 +168,7 @@ > s_cbranch_scc1 L_SAVE //this is the operation for save > > // ********* Handle non-CWSR traps ******************* >-if (!EMU_RUN_HACK) >+ > /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ > s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0 > s_waitcnt lgkmcnt(0) >@@ -268,7 +187,7 @@ > s_and_b32 ttmp1, ttmp1, 0xFFFF > set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) > s_rfe_b64 [ttmp0, ttmp1] >-end >+ > // ********* End handling of non-CWSR traps ******************* > > /**************************************************************************/ >@@ -276,12 +195,6 @@ > /**************************************************************************/ > > L_SAVE: >- >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_save_s >- s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? >-end >- > s_mov_b32 s_save_tmp, 0 //clear saveCtx bit > s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit > >@@ -303,16 +216,7 @@ > s_mov_b32 s_save_exec_hi, exec_hi > s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive > >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_sq_save_msg >- s_waitcnt lgkmcnt(0) >-end >- >- if (EMU_RUN_HACK) >- >- else > s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC >- end > > // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. > s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) >@@ -321,36 +225,9 @@ > L_SLEEP: > s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 > >- if (EMU_RUN_HACK) >- >- else > s_cbranch_execz L_SLEEP >- end >- >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_spi_wrexec >- s_waitcnt lgkmcnt(0) >-end > > /* setup Resource Contants */ >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- //calculate wd_addr using absolute thread id >- v_readlane_b32 s_save_tmp, v9, 0 >- s_lshr_b32 s_save_tmp, s_save_tmp, 6 >- s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end >- >- > s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo > s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi > s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE >@@ -383,22 +260,10 @@ > > > s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > > write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 >- >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO >- s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI >- end >- > write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC > write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) > write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC >@@ -440,18 +305,8 @@ > s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 > s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) > >- if (SGPR_SAVE_USE_SQC) > s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) >- end >- >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- > > // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 > //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 >@@ -490,30 +345,14 @@ > s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on > s_mov_b32 exec_hi, 0xFFFFFFFF > >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- > > // VGPR Allocated in 4-GPR granularity > >-if G8SR_VGPR_SR_IN_DWX4 >- // the const stride for DWx4 is 4*4 bytes >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes >- >- buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes >-else > buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 > buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 > buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 > buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >-end > > > >@@ -549,64 +388,10 @@ > s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() > > >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- > s_mov_b32 m0, 0x0 //lds_offset initial value = 0 > > >-var LDS_DMA_ENABLE = 0 >-var UNROLL = 0 >-if UNROLL==0 && LDS_DMA_ENABLE==1 >- s_mov_b32 s3, 256*2 >- s_nop 0 >- s_nop 0 >- s_nop 0 >- L_SAVE_LDS_LOOP: >- //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? >- if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW >- end >- >- s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes >- s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes >- s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? >- >-elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss >- // store from higest LDS address to lowest >- s_mov_b32 s3, 256*2 >- s_sub_u32 m0, s_save_alloc_size, s3 >- s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 >- s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... >- s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest >- s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction >- s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc >- s_nop 0 >- s_nop 0 >- s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes >- s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved >- s_add_u32 s0, s0,s_save_alloc_size >- s_addc_u32 s1, s1, 0 >- s_setpc_b64 s[0:1] >- >- >- for var i =0; i< 128; i++ >- // be careful to make here a 64Byte aligned address, which could improve performance... >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW >- >- if i!=127 >- s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline >- s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 >- end >- end >- >-else // BUFFER_STORE > v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 > v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid > v_mul_i32_i24 v2, v3, 8 // tid*8 >@@ -628,8 +413,6 @@ > // restore rsrc3 > s_mov_b32 s_save_buf_rsrc3, s0 > >-end >- > L_SAVE_LDS_DONE: > > >@@ -647,44 +430,8 @@ > s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 > s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible > s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- >- // VGPR Allocated in 4-GPR granularity > >-if G8SR_VGPR_SR_IN_DWX4 >- // the const stride for DWx4 is 4*4 bytes >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes >- >- s_mov_b32 m0, 4 // skip first 4 VGPRs >- s_cmp_lt_u32 m0, s_save_alloc_size >- s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs >- >- s_set_gpr_idx_on m0, 0x1 // This will change M0 >- s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 >-L_SAVE_VGPR_LOOP: >- v_mov_b32 v0, v0 // v0 = v[0+m0] >- v_mov_b32 v1, v1 >- v_mov_b32 v2, v2 >- v_mov_b32 v3, v3 >- >- >- buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- s_add_u32 m0, m0, 4 >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 >- s_cmp_lt_u32 m0, s_save_alloc_size >- s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? >- s_set_gpr_idx_off >-L_SAVE_VGPR_LOOP_END: >- >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes >-else > // VGPR store using dw burst > s_mov_b32 m0, 0x4 //VGPR initial index value =0 > s_cmp_lt_u32 m0, s_save_alloc_size >@@ -700,52 +447,18 @@ > v_mov_b32 v2, v2 //v0 = v[0+m0] > v_mov_b32 v3, v3 //v0 = v[0+m0] > >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else > buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 > buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 > buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 > buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >- end > > s_add_u32 m0, m0, 4 //next vgpr index > s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes > s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 > s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? > s_set_gpr_idx_off >-end > > L_SAVE_VGPR_END: >- >- >- >- >- >- >- /* S_PGM_END_SAVED */ //FIXME graphics ONLY >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) >- s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- s_rfe_b64 s_save_pc_lo //Return to the main shader program >- else >- end >- >-// Save Done timestamp >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_save_d >- // SGPR SR memory offset : size(VGPR) >- get_vgpr_size_bytes(s_save_mem_offset) >- s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET >- s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? >- // Need reset rsrc2?? >- s_mov_b32 m0, s_save_mem_offset >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 >-end >- >- > s_branch L_END_PGM > > >@@ -756,27 +469,6 @@ > > L_RESTORE: > /* Setup Resource Contants */ >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) >- //calculate wd_addr using absolute thread id >- v_readlane_b32 s_restore_tmp, v9, 0 >- s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 >- s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE >- s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL >- else >- end >- >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_restore_s >- s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? >- // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... >- s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] >- s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. >-end >- >- >- > s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo > s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi > s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE >@@ -818,18 +510,12 @@ > s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? > > >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > s_mov_b32 m0, 0x0 //lds_offset initial value = 0 > > L_RESTORE_LDS_LOOP: >- if (SAVE_LDS) > buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW > buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW >- end > s_add_u32 m0, m0, 256*2 // 128 DW > s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW > s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 >@@ -848,40 +534,8 @@ > s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 > s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) > s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > >-if G8SR_VGPR_SR_IN_DWX4 >- get_vgpr_size_bytes(s_restore_mem_offset) >- s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 >- >- // the const stride for DWx4 is 4*4 bytes >- s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes >- >- s_mov_b32 m0, s_restore_alloc_size >- s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 >- >-L_RESTORE_VGPR_LOOP: >- buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 >- s_waitcnt vmcnt(0) >- s_sub_u32 m0, m0, 4 >- v_mov_b32 v0, v0 // v[0+m0] = v0 >- v_mov_b32 v1, v1 >- v_mov_b32 v2, v2 >- v_mov_b32 v3, v3 >- s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 >- s_cmp_eq_u32 m0, 0x8000 >- s_cbranch_scc0 L_RESTORE_VGPR_LOOP >- s_set_gpr_idx_off >- >- s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes >- >-else > // VGPR load using dw burst > s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last > s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 >@@ -890,14 +544,10 @@ > s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later > > L_RESTORE_VGPR_LOOP: >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else > buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 > buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 > buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 > buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 >- end > s_waitcnt vmcnt(0) //ensure data ready > v_mov_b32 v0, v0 //v[0+m0] = v0 > v_mov_b32 v1, v1 >@@ -909,16 +559,10 @@ > s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? > s_set_gpr_idx_off > /* VGPR restore on v0 */ >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else > buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 > buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 > buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 > buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 >- end >- >-end > > /* restore SGPRs */ > ////////////////////////////// >@@ -934,16 +578,8 @@ > s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 > s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) > >- if (SGPR_SAVE_USE_SQC) > s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) >- end >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111), > However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG >@@ -972,12 +608,6 @@ > ////////////////////////////// > L_RESTORE_HWREG: > >- >-if G8SR_DEBUG_TIMESTAMP >- s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo >- s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi >-end >- > // HWREG SR memory offset : size(VGPR)+size(SGPR) > get_vgpr_size_bytes(s_restore_mem_offset) > get_sgpr_size_bytes(s_restore_tmp) >@@ -985,11 +615,7 @@ > > > s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 > read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC >@@ -1006,16 +632,6 @@ > > s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS > >- //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) >- s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) >- s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over >- end >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) >- s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal >- s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over >- end >- > s_mov_b32 m0, s_restore_m0 > s_mov_b32 exec_lo, s_restore_exec_lo > s_mov_b32 exec_hi, s_restore_exec_hi >@@ -1048,11 +664,6 @@ > > s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time > >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_restore_d >- s_waitcnt lgkmcnt(0) >-end >- > // s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution > s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc > >diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm >--- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 2019-08-25 14:01:23.000000000 -0500 >+++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 2019-08-31 15:01:11.853736168 -0500 >@@ -24,76 +24,9 @@ > * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex > */ > >-/* HW (GFX9) source code for CWSR trap handler */ >-/* Version 18 + multiple trap handler */ >- >-// this performance-optimal version was originally from Seven Xu at SRDC >- >-// Revison #18 --... >-/* Rev History >-** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) >-** #4. SR Memory Layout: >-** 1. VGPR-SGPR-HWREG-{LDS} >-** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. >-** #5. Update: 1. Accurate g8sr_ts_save_d timestamp >-** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) >-** #7. Update: 1. don't barrier if noLDS >-** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version >-** 2. Fix SQ issue by s_sleep 2 >-** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last >-** 2. optimize s_buffer save by burst 16sgprs... >-** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. >-** #11. Update 1. Add 2 more timestamp for debug version >-** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance >-** #13. Integ 1. Always use MUBUF for PV trap shader... >-** #14. Update 1. s_buffer_store soft clause... >-** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. >-** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree >-** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] >-** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... >-** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 >-** 2. FUNC - Handle non-CWSR traps >-*/ >- >-var G8SR_WDMEM_HWREG_OFFSET = 0 >-var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes >- >-// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. >- >-var G8SR_DEBUG_TIMESTAMP = 0 >-var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset >-var s_g8sr_ts_save_s = s[34:35] // save start >-var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi >-var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ >-var s_g8sr_ts_save_d = s[40:41] // save end >-var s_g8sr_ts_restore_s = s[42:43] // restore start >-var s_g8sr_ts_restore_d = s[44:45] // restore end >- >-var G8SR_VGPR_SR_IN_DWX4 = 0 >-var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes >-var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 >- >- >-/*************************************************************************/ >-/* control on how to run the shader */ >-/*************************************************************************/ >-//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) >-var EMU_RUN_HACK = 0 >-var EMU_RUN_HACK_RESTORE_NORMAL = 0 >-var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 >-var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 >-var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK >-var SAVE_LDS = 1 >-var WG_BASE_ADDR_LO = 0x9000a000 >-var WG_BASE_ADDR_HI = 0x0 >-var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem >-var CTX_SAVE_CONTROL = 0x0 >-var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL >-var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) >-var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write >-var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes >-var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing > var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency >+var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger >+var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised > > /**************************************************************************/ > /* variables */ >@@ -107,6 +40,7 @@ > var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 > var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 > var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 >+var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 > > var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 > var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 >@@ -127,12 +61,15 @@ > var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 > var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 > var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 >+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000 > > var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME > var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME > var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 > var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME > >+var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 >+ > var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 > var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 > >@@ -197,13 +134,15 @@ > var s_restore_spi_init_hi = exec_hi > > var s_restore_mem_offset = ttmp12 >+var s_restore_accvgpr_offset = ttmp13 > var s_restore_alloc_size = ttmp3 > var s_restore_tmp = ttmp2 > var s_restore_mem_offset_save = s_restore_tmp //no conflict >+var s_restore_accvgpr_offset_save = ttmp7 > > var s_restore_m0 = s_restore_alloc_size //no conflict > >-var s_restore_mode = ttmp7 >+var s_restore_mode = s_restore_accvgpr_offset_save > > var s_restore_pc_lo = ttmp0 > var s_restore_pc_hi = ttmp1 >@@ -226,20 +165,11 @@ > /* Shader Main*/ > > shader main >- asic(GFX9) >+ asic(DEFAULT) > type(CS) > > >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore >- //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC >- s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC >- s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. >- s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE >- //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE >- s_branch L_SKIP_RESTORE //NOT restore, SAVE actually >- else > s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save >- end > > L_JUMP_TO_RESTORE: > s_branch L_RESTORE //restore >@@ -248,12 +178,29 @@ > > s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC > s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save >+ >+if SINGLE_STEP_MISSED_WORKAROUND >+ // No single step exceptions if MODE.DEBUG_EN=0. >+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) >+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK >+ s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND >+ >+ // Second-level trap already handled exception if STATUS.HALT=1. >+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK >+ >+ // Prioritize single step exception over context save. >+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX. >+ s_cbranch_scc0 L_FETCH_2ND_TRAP >+ >+L_NO_SINGLE_STEP_WORKAROUND: >+end >+ > s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) > s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save > s_cbranch_scc1 L_SAVE //this is the operation for save > > // ********* Handle non-CWSR traps ******************* >-if (!EMU_RUN_HACK) >+ > // Illegal instruction is a non-maskable exception which blocks context save. > // Halt the wavefront and return from the trap. > s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK >@@ -330,7 +277,7 @@ > set_status_without_spi_prio(s_save_status, ttmp2) > > s_rfe_b64 [ttmp0, ttmp1] >-end >+ > // ********* End handling of non-CWSR traps ******************* > > /**************************************************************************/ >@@ -338,12 +285,6 @@ > /**************************************************************************/ > > L_SAVE: >- >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_save_s >- s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? >-end >- > s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] > > s_mov_b32 s_save_tmp, 0 //clear saveCtx bit >@@ -365,16 +306,7 @@ > s_mov_b32 s_save_exec_hi, exec_hi > s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive > >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_sq_save_msg >- s_waitcnt lgkmcnt(0) >-end >- >- if (EMU_RUN_HACK) >- >- else > s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC >- end > > // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. > s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) >@@ -383,33 +315,7 @@ > L_SLEEP: > s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 > >- if (EMU_RUN_HACK) >- >- else > s_cbranch_execz L_SLEEP >- end >- >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_spi_wrexec >- s_waitcnt lgkmcnt(0) >-end >- >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- //calculate wd_addr using absolute thread id >- v_readlane_b32 s_save_tmp, v9, 0 >- s_lshr_b32 s_save_tmp, s_save_tmp, 6 >- s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) >- s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO >- s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI >- s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL >- else >- end > > // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic > // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 >@@ -459,20 +365,10 @@ > > > s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > > write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 >- >- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- end >- > write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC > write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) > write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC >@@ -510,17 +406,9 @@ > s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 > s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) > >- if (SGPR_SAVE_USE_SQC) > s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes >- else >- s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) >- end > >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > > // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 >@@ -563,30 +451,25 @@ > s_mov_b32 xnack_mask_lo, 0x0 > s_mov_b32 xnack_mask_hi, 0x0 > >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > > // VGPR Allocated in 4-GPR granularity > >-if G8SR_VGPR_SR_IN_DWX4 >- // the const stride for DWx4 is 4*4 bytes >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes >- >- buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes >-else >+if SAVE_AFTER_XNACK_ERROR >+ check_if_tcp_store_ok() >+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS_WITH_TCP >+ >+ write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) >+ s_branch L_SAVE_LDS >+ >+L_SAVE_FIRST_VGPRS_WITH_TCP: >+end >+ > buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 > buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 > buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 > buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >-end > > > >@@ -621,66 +504,34 @@ > s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() > > >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end > > s_mov_b32 m0, 0x0 //lds_offset initial value = 0 > > >-var LDS_DMA_ENABLE = 0 >-var UNROLL = 0 >-if UNROLL==0 && LDS_DMA_ENABLE==1 >- s_mov_b32 s3, 256*2 >- s_nop 0 >- s_nop 0 >- s_nop 0 >- L_SAVE_LDS_LOOP: >- //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? >- if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW >- end >- >- s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes >- s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes >- s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 >- s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? >- >-elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss >- // store from higest LDS address to lowest >- s_mov_b32 s3, 256*2 >- s_sub_u32 m0, s_save_alloc_size, s3 >- s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 >- s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... >- s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest >- s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction >- s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc >- s_nop 0 >- s_nop 0 >- s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes >- s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved >- s_add_u32 s0, s0,s_save_alloc_size >- s_addc_u32 s1, s1, 0 >- s_setpc_b64 s[0:1] >- >- >- for var i =0; i< 128; i++ >- // be careful to make here a 64Byte aligned address, which could improve performance... >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW >- buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW >- >- if i!=127 >- s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline >- s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 >- end >- end >- >-else // BUFFER_STORE > v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 > v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid >+ >+if SAVE_AFTER_XNACK_ERROR >+ check_if_tcp_store_ok() >+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP >+ >+ v_lshlrev_b32 v2, 2, v3 >+L_SAVE_LDS_LOOP_SQC: >+ ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40 >+ s_waitcnt lgkmcnt(0) >+ >+ write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ v_add_u32 v2, 0x200, v2 >+ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size >+ s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC >+ >+ s_branch L_SAVE_LDS_DONE >+ >+L_SAVE_LDS_WITH_TCP: >+end >+ > v_mul_i32_i24 v2, v3, 8 // tid*8 > v_mov_b32 v3, 256*2 > s_mov_b32 m0, 0x10000 >@@ -701,8 +552,6 @@ > // restore rsrc3 > s_mov_b32 s_save_buf_rsrc3, s0 > >-end >- > L_SAVE_LDS_DONE: > > >@@ -720,44 +569,9 @@ > s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 > s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible > s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) >- if (SWIZZLE_EN) >- s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? >- else > s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- end >- >- >- // VGPR Allocated in 4-GPR granularity > >-if G8SR_VGPR_SR_IN_DWX4 >- // the const stride for DWx4 is 4*4 bytes >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes >- >- s_mov_b32 m0, 4 // skip first 4 VGPRs >- s_cmp_lt_u32 m0, s_save_alloc_size >- s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs >- >- s_set_gpr_idx_on m0, 0x1 // This will change M0 >- s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 >-L_SAVE_VGPR_LOOP: >- v_mov_b32 v0, v0 // v0 = v[0+m0] >- v_mov_b32 v1, v1 >- v_mov_b32 v2, v2 >- v_mov_b32 v3, v3 >- >- >- buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >- s_add_u32 m0, m0, 4 >- s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 >- s_cmp_lt_u32 m0, s_save_alloc_size >- s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? >- s_set_gpr_idx_off >-L_SAVE_VGPR_LOOP_END: > >- s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 >- s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes >-else > // VGPR store using dw burst > s_mov_b32 m0, 0x4 //VGPR initial index value =0 > s_cmp_lt_u32 m0, s_save_alloc_size >@@ -767,57 +581,82 @@ > s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 > s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later > >+if SAVE_AFTER_XNACK_ERROR >+ check_if_tcp_store_ok() >+ s_cbranch_scc1 L_SAVE_VGPR_LOOP >+ >+L_SAVE_VGPR_LOOP_SQC: >+ write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) >+ >+ s_add_u32 m0, m0, 4 >+ s_cmp_lt_u32 m0, s_save_alloc_size >+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC >+ >+ s_set_gpr_idx_off >+ s_branch L_SAVE_VGPR_END >+end >+ > L_SAVE_VGPR_LOOP: > v_mov_b32 v0, v0 //v0 = v[0+m0] > v_mov_b32 v1, v1 //v0 = v[0+m0] > v_mov_b32 v2, v2 //v0 = v[0+m0] > v_mov_b32 v3, v3 //v0 = v[0+m0] > >- if(USE_MTBUF_INSTEAD_OF_MUBUF) >- tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 >- else > buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 > buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 > buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 > buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >- end > > s_add_u32 m0, m0, 4 //next vgpr index > s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes > s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 > s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? > s_set_gpr_idx_off >-end > > L_SAVE_VGPR_END: > >+if ASIC_TARGET_ARCTURUS >+ // Save ACC VGPRs >+ s_mov_b32 m0, 0x0 //VGPR initial index value =0 >+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 > >+if SAVE_AFTER_XNACK_ERROR >+ check_if_tcp_store_ok() >+ s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP > >+L_SAVE_ACCVGPR_LOOP_SQC: >+ for var vgpr = 0; vgpr < 4; ++ vgpr >+ v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0] >+ end >+ >+ write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) > >+ s_add_u32 m0, m0, 4 >+ s_cmp_lt_u32 m0, s_save_alloc_size >+ s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP_SQC > >+ s_set_gpr_idx_off >+ s_branch L_SAVE_ACCVGPR_END >+end > >- /* S_PGM_END_SAVED */ //FIXME graphics ONLY >- if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) >- s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] >- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 >- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over >- s_rfe_b64 s_save_pc_lo //Return to the main shader program >- else >+L_SAVE_ACCVGPR_LOOP: >+ for var vgpr = 0; vgpr < 4; ++ vgpr >+ v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0] > end > >-// Save Done timestamp >-if G8SR_DEBUG_TIMESTAMP >- s_memrealtime s_g8sr_ts_save_d >- // SGPR SR memory offset : size(VGPR) >- get_vgpr_size_bytes(s_save_mem_offset) >- s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET >- s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? >- // Need reset rsrc2?? >- s_mov_b32 m0, s_save_mem_offset >- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes >- s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 >-end >+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 >+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 >+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 >+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 >+ >+ s_add_u32 m0, m0, 4 >+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 >+ s_cmp_lt_u32 m0, s_save_alloc_size >+ s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP >+ s_set_gpr_idx_off > >+L_SAVE_ACCVGPR_EN